Python imap_unordered示例，multiprocessing.pool.imap_unordered Python示例

示例#1

0

显示文件

文件： sample-1.py 项目： shoriwe/LVaED

    def run(self) -> float:
        if self._complete.is_set():
            raise StopIteration("This runner has already being used")
        if self._running:
            raise StopIteration("This runner is being executed")
        self._running = True
        if (self._threads != 1
                or self._processes != 1) and self._optimize_workers:
            t = time.time()
            result = self._function(*next(self._raw_function_arguments))
            time_spent = time.time() - t
            if self._check_function(result):
                self._success_function(result)
            if time_spent < self.__speed_reference:
                self._threads = 1
                self._processes = 1
                self._function_arguments = self._raw_function_arguments
        else:
            self._function_arguments = self._raw_function_arguments

        if self._threads == self._processes and self._threads == 1:
            self._function_arguments: collections.Iterable
            start = time.time()
            for args in self._function_arguments:
                output = self._function(*args)
                if self._check_function(output):
                    self._success_function(output)
            return time.time() - start

        self._function_arguments = Queue(self._raw_function_arguments)
        if self._processes == 1 or self._threads == 1:
            if self._processes > self._threads:
                self._threads = self._processes
            self._blocking_success = True
            start = time.time()
            self._process_worker()
            return time.time() - start
        self._blocking_success = False
        self._success_sync_queue = multiprocessing.Queue()
        sync_thread = threading.Thread(target=self._sync_success, )
        sync_thread.start()
        if any(platform in sys.platform
               for platform in ("win", "ios")) or self._processes_as_threads:
            process_pool = multiprocessing.pool.ThreadPool
        else:
            process_pool = multiprocessing.pool.Pool
        start = time.time()
        pool = process_pool(processes=self._processes)
        pool.imap_unordered(lambda f: f(), (self._process_worker
                                            for _ in range(self._processes)),
                            chunksize=self._processes)
        pool.close()
        pool.join()
        pool.terminate()
        self._complete.set()
        self._function_arguments.stop()
        self._function_arguments.join()
        sync_thread.join()
        self._running = False
        return time.time() - start

示例#2

0

显示文件

文件： utils.py 项目： MannLabs/alphatims

        def wrapper(iterable, *args, **kwargs):
            def starfunc(iterable):
                return func(iterable, *args, **kwargs)

            try:
                iter(iterable)
            except TypeError:
                return func(iterable, *args, **kwargs)
            if thread_count is None:
                current_thread_count = MAX_THREADS
            else:
                current_thread_count = set_threads(thread_count,
                                                   set_global=False)
            with multiprocessing.pool.ThreadPool(current_thread_count) as pool:
                if return_results:
                    results = []
                    for result in progress_callback(
                            pool.imap(starfunc, iterable),
                            total=len(iterable),
                            include_progress_callback=include_progress_callback
                    ):
                        results.append(result)
                    return results
                else:
                    for result in progress_callback(
                            pool.imap_unordered(starfunc, iterable),
                            total=len(iterable),
                            include_progress_callback=include_progress_callback
                    ):
                        pass

示例#3

0

显示文件

 def _push(self, src, dst):
     """
     Push src to dst on the remote.
     """
     force = False
     if src.startswith('+'):
         src = src[1:]
         force = True
     present = [self._refs[name][1] for name in self._refs]
     present.extend(self._pushed.values())
     # before updating the ref, write all objects that are referenced
     objects = git_list_objects(src, present)
     try:
         # upload objects in parallel
         pool = multiprocessing.pool.ThreadPool(processes=self._processes)
         res = pool.imap_unordered(Binder(self, '_put_object'), objects)
         # show progress
         total = len(objects)
         self._trace('', level=Level.INFO, exact=True)
         for done, _ in enumerate(res, 1):
             pct = float(done) / total
             message = '\rWriting objects: {:4.0%} ({}/{})'.format(pct, done, total)
             if done == total:
                 message = '%s, done.\n' % message
             self._trace(message, level=Level.INFO, exact=True)
     except Exception:
         self._fatal('exception while writing objects')
     sha = git_ref_value(src)
     error = self._write_ref(sha, dst, force)
     if error is None:
         self._write('ok %s' % dst)
         self._pushed[dst] = sha
     else:
         self._write('error %s %s' % (dst, error))

示例#4

0

显示文件

文件： stream.py 项目： maxgio92/test-infra-1

def get_started_finished(gcs_client, db, todo):
    """Download started/finished.json from build dirs in todo."""
    ack_ids = []
    build_dirs = []
    pool = multiprocessing.pool.ThreadPool(16)
    try:
        for ack_id, (build_dir, started, finished) in pool.imap_unordered(
                lambda ack_id_job_build: (ack_id_job_build[0], gcs_client.get_started_finished(
                    ack_id_job_build[1], ack_id_job_build[2])),
                todo):
            if finished:
                if not db.insert_build(build_dir, started, finished):
                    print('build dir already present in db: ', build_dir)
                start = time.localtime(started.get('timestamp', 0) if started else 0)
                print((build_dir, bool(started), bool(finished),
                       time.strftime('%F %T %Z', start),
                       finished and finished.get('result')))
                build_dirs.append(build_dir)
                ack_ids.append(ack_id)
            else:
                print('finished.json missing?', build_dir, started, finished)
    finally:
        pool.close()
    db.commit()
    return ack_ids, build_dirs

示例#5

0

显示文件

文件： deploy_production.py 项目： khadas/android_external_autotest

def update_in_parallel(servers, options):
    """Update a group of servers in parallel.

    Exit the process with error if any server failed to be updated and
    options.cont is not set.

    @param servers: A list of tuple of (server_name, server_status, roles).
    @param options: Options for the push.

    """
    args = []
    for server, status, _ in servers:
        args.append({'server': server, 'status': status, 'options': options})
    # The update actions run in parallel. If any update failed, we should wait
    # for other running updates being finished. Abort in the middle of an update
    # may leave the server in a bad state.
    pool = multiprocessing.pool.ThreadPool(POOL_SIZE)
    failed_servers = []
    results = pool.imap_unordered(update_server, args)
    for server, success, output in results:
        if options.dryrun:
            print('Dry run, updating server %s is skipped.' % server)
        elif success:
            print('Successfully updated server %s.' % server)
            if options.verbose:
                print(output)
                print()
        else:
            error = ('Failed to update server %s.\nError: %s' %
                     (server, output))
            print(error)
            failed_servers.append(server)
    if failed_servers and not options.cont:
        print('Error! Failed to update following servers: %s' % failed_servers)
        sys.exit(1)

示例#6

0

显示文件

文件： generate_database.py 项目： lvella/solmap

def create_indexed_database(direct_normal_csv, diffuse_csv):
    import csv
    import multiprocessing.pool

    storage = ABES.IndexedStorage(ABES.dbm_file, 'n')

    dirnorm = dict(extract_csv_data(direct_normal_csv))
    diffuse = dict(extract_csv_data(diffuse_csv))

    assert (len(dirnorm) == len(diffuse))

    pool = multiprocessing.pool.Pool()

    read_data = ((key, dirn, diffuse[key]) for key, dirn in dirnorm.items())

    i = 1
    for key, pdir, pdif in pool.imap_unordered(convert_to_power, read_data):
        storage[key] = (pdir, pdif)

        print(i, '/', len(dirnorm))
        print(key, ABES.key2coords(key))
        print(pdir)
        print(pdif)
        print('\n')
        i += 1

示例#7

0

显示文件

文件： merge_ubuntu.py 项目： Vector35/sigkit

def main():
    if not os.path.exists('sigs'):
        os.mkdir('sigs')
    elif not os.path.isdir('sigs'):
        print('Please delete "sigs" before starting')
        sys.exit(1)

    tasks = []
    distr = 'ubuntu'
    # for version in os.listdir(distr):
    for version in ['bionic']:
        version = os.path.join(distr, version)
        for arch in os.listdir(version):
            arch = os.path.join(version, arch)
            for package in os.listdir(arch):
                package = os.path.join(arch, package)
                tasks.append(package)

    # we are going to do some heirarchical multiprocessing because there is a very high pickle message-passing overhead
    # so a lot of cpu time gets burned pickling in the main process simply passing work to worker processes
    import subprocess
    import multiprocessing.pool
    pool = multiprocessing.pool.ThreadPool(cpu_factor)

    def do_package_in_worker(package):
        subprocess.call(['python3', __file__, '-c', package])

    for _ in pool.imap_unordered(do_package_in_worker, tasks):
        pass

示例#8

0

显示文件

文件： __init__.py 项目： Perfit-labs/git-remote-dropbox

 def _push(self, src, dst):
     """
     Push src to dst on the remote.
     """
     force = False
     if src.startswith('+'):
         src = src[1:]
         force = True
     present = [self._refs[name][1] for name in self._refs]
     present.extend(self._pushed.values())
     # before updating the ref, write all objects that are referenced
     objects = git_list_objects(src, present)
     try:
         # upload objects in parallel
         pool = multiprocessing.pool.ThreadPool(processes=self._processes)
         res = pool.imap_unordered(Binder(self, '_put_object'), objects)
         # show progress
         total = len(objects)
         self._trace('', level=Level.INFO, exact=True)
         for done, _ in enumerate(res, 1):
             pct = float(done) / total
             message = '\rWriting objects: {:4.0%} ({}/{})'.format(pct, done, total)
             if done == total:
                 message = '%s, done.\n' % message
             self._trace(message, level=Level.INFO, exact=True)
     except Exception:
         self._fatal('exception while writing objects')
     sha = git_ref_value(src)
     error = self._write_ref(sha, dst, force)
     if error is None:
         self._write('ok %s' % dst)
         self._pushed[dst] = sha
     else:
         self._write('error %s %s' % (dst, error))

示例#9

0

显示文件

文件： make_db.py 项目： ihmccreery/test-infra

def download_junit(db, threads, client_class):
    """Download junit results for builds without them."""
    builds_to_grab = db.get_builds_missing_junit()
    pool = None
    if threads > 1:
        pool = multiprocessing.pool.ThreadPool(
            threads, mp_init_worker, ('', {}, client_class, False))
        test_iterator = pool.imap_unordered(
            get_junits, builds_to_grab)
    else:
        global WORKER_CLIENT  # pylint: disable=global-statement
        WORKER_CLIENT = client_class('', {})
        test_iterator = (
            get_junits(build_path) for build_path in builds_to_grab)
    for n, (build_id, build_path, junits) in enumerate(test_iterator, 1):
        print('%d/%d' % (n, len(builds_to_grab)),
              build_path, len(junits), len(''.join(junits.values())))
        junits = {k: remove_system_out(v) for k, v in junits.iteritems()}

        db.insert_build_junits(build_id, junits)
        if n % 100 == 0:
            db.commit()
    db.commit()
    if pool:
        pool.close()
        pool.join()

示例#10

0

显示文件

文件： validate.py 项目： hongyunnchen/CompilerGym

def validate_states(
    make_env: Callable[[], CompilerEnv],
    states: Iterable[CompilerEnvState],
    datasets: Optional[List[str]] = None,
    nproc: Optional[int] = None,
) -> Iterable[ValidationResult]:
    """A parallelized implementation of
    :func:`validate_state() <compiler_gym.validate_state>` for batched
    validation.

    :param make_env: A callback which instantiates a compiler environment.
    :param states: A sequence of compiler environment states to validate.
    :param datasets: An optional list of datasets that are required.
    :param nproc: The number of parallel worker processes to run.
    :return: An iterator over validation results. The order of results may
        differ from the input states.
    """
    env = make_env()
    try:
        if not isinstance(env, LlvmEnv):
            raise ValueError(
                "Only LLVM environment is supported for validation.")

        # Ensure that the required datasets are available.
        env.require_datasets(datasets)
        reward_space_name: str = env.reward_space.id if env.reward_space else None
    finally:
        env.close()

    with multiprocessing.Pool(processes=nproc) as pool:
        yield from pool.imap_unordered(_validate_states_worker,
                                       [(reward_space_name, r)
                                        for r in states])

示例#11

0

显示文件

文件： index.py 项目： poke1024/vectorian

    def _find(self, query, n_threads=None, progress=None):
        p_query = query.prepare(self._nlp)

        if len(p_query) == 0:
            return []

        find_in_doc = functools.partial(self._find_in_doc,
                                        c_query=p_query.compiled)

        docs = self.session.c_documents

        total = sum([x.n_tokens for x in docs])
        done = 0

        if n_threads is None:
            n_threads = min(len(docs), self._max_threads)

        results = None
        with multiprocessing.pool.ThreadPool(processes=n_threads) as pool:
            for doc, r in pool.imap_unordered(find_in_doc, docs):
                if results is None:
                    results = r
                else:
                    results.extend(r)
                done += doc.n_tokens
                if progress:
                    progress(done / total)

        return [CoreMatch(self, p_query, m) for m in results.best_n(-1)]

示例#12

0

显示文件

文件： s3.py 项目： PatientPing/smart_open

def iter_bucket(bucket_name,
                prefix='',
                accept_key=lambda key: True,
                key_limit=None,
                workers=16,
                retries=3):
    """
    Iterate and download all S3 files under `bucket/prefix`, yielding out
    `(key, key content)` 2-tuples (generator).

    `accept_key` is a function that accepts a key name (unicode string) and
    returns True/False, signalling whether the given key should be downloaded out or
    not (default: accept all keys).

    If `key_limit` is given, stop after yielding out that many results.

    The keys are processed in parallel, using `workers` processes (default: 16),
    to speed up downloads greatly. If multiprocessing is not available, thus
    _MULTIPROCESSING is False, this parameter will be ignored.

    Example::

      >>> # get all JSON files under "mybucket/foo/"
      >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')):
      ...     print key, len(content)

      >>> # limit to 10k files, using 32 parallel workers (default is 16)
      >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32):
      ...     print key, len(content)
    """
    #
    # If people insist on giving us bucket instances, silently extract the name
    # before moving on.  Works for boto3 as well as boto.
    #
    try:
        bucket_name = bucket_name.name
    except AttributeError:
        pass

    total_size, key_no = 0, -1
    key_iterator = _list_bucket(bucket_name,
                                prefix=prefix,
                                accept_key=accept_key)
    download_key = functools.partial(_download_key,
                                     bucket_name=bucket_name,
                                     retries=retries)

    with _create_process_pool(processes=workers) as pool:
        result_iterator = pool.imap_unordered(download_key, key_iterator)
        for key_no, (key, content) in enumerate(result_iterator):
            if True or key_no % 1000 == 0:
                logger.info("yielding key #%i: %s, size %i (total %.1fMB)",
                            key_no, key, len(content), total_size / 1024.0**2)
            yield key, content
            total_size += len(content)

            if key_limit is not None and key_no + 1 >= key_limit:
                # we were asked to output only a limited number of keys => we're done
                break
    logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))

示例#13

0

显示文件

def download_junit(db, threads, client_class):
    """Download junit results for builds without them."""
    builds_to_grab = db.get_builds_missing_junit()
    pool = None
    if threads > 1:
        pool = multiprocessing.pool.ThreadPool(threads, mp_init_worker,
                                               ('', {}, client_class, False))
        test_iterator = pool.imap_unordered(get_junits, builds_to_grab)
    else:
        global WORKER_CLIENT  # pylint: disable=global-statement
        WORKER_CLIENT = client_class('', {})
        test_iterator = (get_junits(build_path)
                         for build_path in builds_to_grab)
    for n, (build_id, build_path, junits) in enumerate(test_iterator, 1):
        print('%d/%d' % (n, len(builds_to_grab)), build_path, len(junits),
              len(''.join(junits.values())))
        junits = {k: remove_system_out(v) for k, v in junits.iteritems()}

        db.insert_build_junits(build_id, junits)
        if n % 100 == 0:
            db.commit()
    db.commit()
    if pool:
        pool.close()
        pool.join()

示例#14

0

显示文件

def main():
    # scraped_count = ScrapedPage.select(fn.Count(ScrapedPage.id)).where(ScrapedPage.batch == 2)

    df_websites = pd.read_excel(
        'data/7_opensources_co/websites_with_results.xlsx')
    domains = [u for u in df_websites.url.values]

    domain_type = {}
    websites_url = df_websites.url.values
    websites_type = df_websites.type.values
    for i, url in enumerate(websites_url):
        domain_type[url] = websites_type[i]

    urls_domains_not_found = []

    with open(
            'data/7_opensources_co/news_cleaned_postgres_missing_domains.csv',
            'w') as out_missing_domains:
        with tqdm() as progress:
            print('Cleaning')
            pages_parsed = []
            with multiprocessing.pool.Pool(
                    processes=multiprocessing.cpu_count()) as pool:
                # for page in pool.imap_unordered(parse_article, fetch_pages(last_id, batch_size), chunksize=100):
                for page in pool.imap_unordered(parse_article,
                                                fetch_pages_jsonl(),
                                                chunksize=100):
                    if page is None:
                        continue

                    domain = None
                    for d in domains:
                        if d in page['url']:
                            domain = d

                    if domain is None:
                        urls_domains_not_found.append(page['url'])
                        out_missing_domains.write(page['url'] + '\n')

                        domain = urlsplit(page['url']).netloc

                    page['domain'] = domain
                    page['type'] = domain_type[page['domain']] if page[
                        'domain'] in domain_type else None

                    pages_parsed.append(page)
                    progress.update()

                    if len(pages_parsed) > 1000:
                        print('Inserting cleaned articles to DB')
                        with peewee_database.atomic():
                            Page.insert_many(pages_parsed).execute()
                            pages_parsed = []

            print('Inserting cleaned articles to DB')
            with peewee_database.atomic():
                Page.insert_many(pages_parsed).execute()

            print('Urls without our domains?!:', len(urls_domains_not_found))

示例#15

0

显示文件

def generate_items(keys: Iterable, factory: Callable[[Any], tuple], method: Callable) -> Iterator:
    """Generate (key, method(*factory(key))) tuples for each key. The first element returned by factory is an instance
    of the class to which method is attached. If a process pool has been initialized, use multiprocessing; otherwise,
    use serial processing.
    """
    if pool is None:
        return (generate_items_worker((k, factory(k), method)) for k in keys)
    return pool.imap_unordered(generate_items_worker, ((k, factory(k), method) for k in keys))

示例#16

0

显示文件

文件： smart_open_lib.py 项目： val314159/smart_open

def s3_iter_bucket(bucket, prefix="", accept_key=lambda key: True, key_limit=None, workers=16):
    """
    Iterate and download all S3 files under `bucket/prefix`, yielding out
    `(key, key content)` 2-tuples (generator).

    `accept_key` is a function that accepts a key name (unicode string) and
    returns True/False, signalling whether the given key should be downloaded out or
    not (default: accept all keys).

    If `key_limit` is given, stop after yielding out that many results.

    The keys are processed in parallel, using `workers` processes (default: 16),
    to speed up downloads greatly. If multiprocessing is not available, thus
    NO_MULTIPROCESSING is True, this parameter will be ignored.

    Example::

      >>> mybucket = boto.connect_s3().get_bucket('mybucket')

      >>> # get all JSON files under "mybucket/foo/"
      >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')):
      ...     print key, len(content)

      >>> # limit to 10k files, using 32 parallel workers (default is 16)
      >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32):
      ...     print key, len(content)

    """
    total_size, key_no = 0, -1
    keys = (key for key in bucket.list(prefix=prefix) if accept_key(key.name))

    if NO_MULTIPROCESSING:
        logger.info("iterating over keys from %s without multiprocessing" % bucket)
        iterator = imap(s3_iter_bucket_process_key, keys)
    else:
        logger.info("iterating over keys from %s with %i workers" % (bucket, workers))
        pool = multiprocessing.pool.Pool(processes=workers)
        iterator = pool.imap_unordered(s3_iter_bucket_process_key, keys)

    for key_no, (key, content) in enumerate(iterator):
        if key_no % 1000 == 0:
            logger.info(
                "yielding key #%i: %s, size %i (total %.1fMB)" % (key_no, key, len(content), total_size / 1024.0 ** 2)
            )

        yield key, content
        key.close()
        total_size += len(content)

        if key_limit is not None and key_no + 1 >= key_limit:
            # we were asked to output only a limited number of keys => we're done
            break

    if not NO_MULTIPROCESSING:
        pool.terminate()

    logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))

示例#17

0

显示文件

文件： utils.py 项目： aviranzerioniac/beautiful-tucan

def progresspmap(pool: mp.pool.Pool, func: Callable[[X], Y],
                 lst: List[X]) -> List[Y]:
    """ a parallel map with a progressbar. """
    i, maxi, result = 0, len(lst), []
    for item in pool.imap_unordered(func, lst):
        result.append(item)
        i += 1
        progress(i, maxi)
    return result

示例#18

0

显示文件

文件： calculate_scores.py 项目： anilkunwar/BluePyMM

def calculate_scores(final_dict,
                     emodel_dirs,
                     scores_db_filename,
                     use_ipyp=False,
                     ipyp_profile=None):
    """Calculate scores of e-model morphology combinations and update the
    database accordingly.

    Args:
        scores_db_filename: path to .sqlite database with e-model morphology
            combinations
        final_dict: a dict mapping e-models to dicts with e-model parameters
        emodel_dirs: a dict mapping e-models to the directories with e-model
            input files
        use_ipyp: bool indicating whether ipyparallel is used. Default is
            False.
        ipyp_profile: path to ipyparallel profile. Default is None.
    """

    print('Creating argument list for parallelisation')
    arg_list = create_arg_list(scores_db_filename, emodel_dirs, final_dict)

    print('Parallelising score evaluation of %d me-combos' % len(arg_list))

    if use_ipyp:
        # use ipyparallel
        client = ipyparallel.Client(profile=ipyp_profile)
        lview = client.load_balanced_view()
        results = lview.imap(run_emodel_morph_isolated,
                             arg_list,
                             ordered=False)
    else:
        # use multiprocessing
        pool = NestedPool()
        results = pool.imap_unordered(run_emodel_morph_isolated, arg_list)

    # keep track of the number of received results
    uids_received = 0

    # every time a result comes in, save the score in the database
    for result in results:
        uid = result['uid']
        scores = result['scores']
        extra_values = result['extra_values']
        exception = result['exception']
        uids_received += 1

        save_scores(scores_db_filename, uid, scores, extra_values, exception)

        print('Saved scores for uid %s (%d out of %d) %s' %
              (uid, uids_received, len(arg_list),
               'with exception' if exception else ''))
        sys.stdout.flush()

    print('Converting score json strings to scores values ...')
    expand_scores_to_score_values_table(scores_db_filename)

示例#19

0

显示文件

文件： smart_open_lib.py 项目： bradycale-taulia/smart_open

def s3_iter_bucket(bucket, prefix='', accept_key=lambda key: True, key_limit=None, workers=16, retries=3):
    """
    Iterate and download all S3 files under `bucket/prefix`, yielding out
    `(key, key content)` 2-tuples (generator).

    `accept_key` is a function that accepts a key name (unicode string) and
    returns True/False, signalling whether the given key should be downloaded out or
    not (default: accept all keys).

    If `key_limit` is given, stop after yielding out that many results.

    The keys are processed in parallel, using `workers` processes (default: 16),
    to speed up downloads greatly. If multiprocessing is not available, thus
    MULTIPROCESSING is False, this parameter will be ignored.

    Example::

      >>> mybucket = boto.connect_s3().get_bucket('mybucket')

      >>> # get all JSON files under "mybucket/foo/"
      >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')):
      ...     print key, len(content)

      >>> # limit to 10k files, using 32 parallel workers (default is 16)
      >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32):
      ...     print key, len(content)

    """
    total_size, key_no = 0, -1
    keys = ({'key': key, 'retries': retries} for key in bucket.list(prefix=prefix) if accept_key(key.name))

    if MULTIPROCESSING:
        logger.info("iterating over keys from %s with %i workers" % (bucket, workers))
        pool = multiprocessing.pool.Pool(processes=workers)
        iterator = pool.imap_unordered(s3_iter_bucket_process_key_with_kwargs, keys)
    else:
        logger.info("iterating over keys from %s without multiprocessing" % bucket)
        iterator = imap(s3_iter_bucket_process_key_with_kwargs, keys)

    for key_no, (key, content) in enumerate(iterator):
        if key_no % 1000 == 0:
            logger.info("yielding key #%i: %s, size %i (total %.1fMB)" %
                (key_no, key, len(content), total_size / 1024.0 ** 2))

        yield key, content
        key.close()
        total_size += len(content)

        if key_limit is not None and key_no + 1 >= key_limit:
            # we were asked to output only a limited number of keys => we're done
            break

    if MULTIPROCESSING:
        pool.terminate()

    logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))

示例#20

0

显示文件

def main():
    import os
    if not os.path.isdir(const_path):
        os.mkdir(const_path)

    equipes = range(3, 11)
    pool = multiprocessing.pool.Pool()
    resultat = pool.imap_unordered(compute_lowest_duration, equipes)
    for ne, nj, txt in resultat:
        print(ne, "équipes :", nj, "jours", txt)

示例#21

0

显示文件

文件： train_prescriptor.py 项目： rafaie/covid-xprize

 def eval_genomes_par(genomes, config):
     pool = Pool(4)
     genomes_list = [genome for ignored_genome_id, genome in genomes]
     for id, fitness in pool.imap_unordered(
             partial(eval_genomes, config=config,
                     genomes_list=genomes_list), range(len(genomes_list))):
         genomes_list[id].fitness = fitness
     pool.close()  # should this be terminate?
     pool.join()
     for id, g in enumerate(genomes_list):
         print(id, g.fitness)

示例#22

0

显示文件

文件： s3.py 项目： dpritsos/DoGSWrapper

def iter_bucket(bucket_name, prefix='', accept_key=lambda key: True,
                key_limit=None, workers=16, retries=3):
    """
    Iterate and download all S3 files under `bucket/prefix`, yielding out
    `(key, key content)` 2-tuples (generator).

    `accept_key` is a function that accepts a key name (unicode string) and
    returns True/False, signalling whether the given key should be downloaded out or
    not (default: accept all keys).

    If `key_limit` is given, stop after yielding out that many results.

    The keys are processed in parallel, using `workers` processes (default: 16),
    to speed up downloads greatly. If multiprocessing is not available, thus
    _MULTIPROCESSING is False, this parameter will be ignored.

    Example::

      >>> # get all JSON files under "mybucket/foo/"
      >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')):
      ...     print key, len(content)

      >>> # limit to 10k files, using 32 parallel workers (default is 16)
      >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32):
      ...     print key, len(content)
    """
    #
    # If people insist on giving us bucket instances, silently extract the name
    # before moving on.  Works for boto3 as well as boto.
    #
    try:
        bucket_name = bucket_name.name
    except AttributeError:
        pass

    total_size, key_no = 0, -1
    key_iterator = _list_bucket(bucket_name, prefix=prefix, accept_key=accept_key)
    download_key = functools.partial(_download_key, bucket_name=bucket_name, retries=retries)

    with _create_process_pool(processes=workers) as pool:
        result_iterator = pool.imap_unordered(download_key, key_iterator)
        for key_no, (key, content) in enumerate(result_iterator):
            if True or key_no % 1000 == 0:
                logger.info(
                    "yielding key #%i: %s, size %i (total %.1fMB)",
                    key_no, key, len(content), total_size / 1024.0 ** 2
                )
            yield key, content
            total_size += len(content)

            if key_limit is not None and key_no + 1 >= key_limit:
                # we were asked to output only a limited number of keys => we're done
                break
    logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))

示例#23

0

显示文件

文件： parallels.py 项目： Andy-math/overloads

def parfor(
    f: Callable[[param_t], return_t],
    arg_list: Sequence[param_t],
    *,
    callback: Optional[
        Callable[[Union[return_t, Captured_Exception[param_t, return_t]]], None]
    ] = None,
    print_time: bool = False,
    task_name: Optional[str] = None,
) -> List[Union[return_t, Captured_Exception[param_t, return_t]]]:
    def timedelta2str(T: datetime.timedelta) -> str:
        s = str(T)
        return s[: s.rfind(".")]

    if pool is None:
        launch_parpool()
        assert pool is not None
    helper_arg_list = ((idx, f, arg) for idx, arg in enumerate(arg_list))
    result_dict: Dict[int, Union[return_t, Captured_Exception[param_t, return_t]]] = {}
    num_total = len(arg_list)
    num_finished = 0
    time_start = datetime.datetime.now()
    helper = cast(
        Callable[
            [Tuple[int, Callable[[param_t], return_t], param_t]],
            Tuple[int, Union[return_t, Captured_Exception[param_t, return_t]]],
        ],
        parfor_helper,
    )
    for idx, result in pool.imap_unordered(helper, helper_arg_list):
        num_finished += 1
        time_now = datetime.datetime.now()
        time_elapsed = time_now - time_start
        time_need = ((num_total - num_finished) / num_finished) * time_elapsed
        if isinstance(result, Captured_Exception):
            print_without_line_feed("[{}]: {}\n".format(idx, result))
        if print_time:
            assert task_name is not None
            print_without_line_feed(
                "{}\n\t已完成{}/{}, {:05.2f}%, 已用{}, 预计还需{}, 结束时间{:%Y-%m-%d %H:%M:%S}\n".format(  # noqa: E501
                    task_name,
                    num_finished,
                    num_total,
                    100 * num_finished / num_total,
                    timedelta2str(time_elapsed),
                    timedelta2str(time_need),
                    time_now + time_need,
                )
            )
        if callback is not None:
            callback(result)
        result_dict[idx] = result
    result_list = [result_dict[idx] for idx in range(len(arg_list))]
    return result_list

示例#24

0

显示文件

def _map_multithread(func, iterable, chunksize=1):
    # type: (Callable[[S], T], Iterable[S], int) -> Iterator[T]
    """Chop iterable into chunks and submit them to a thread pool.

    For very long iterables using a large value for chunksize can make
    the job complete much faster than using the default value of 1.

    Return an unordered iterator of the results.
    """
    with closing(ThreadPool(DEFAULT_POOLSIZE)) as pool:
        return pool.imap_unordered(func, iterable, chunksize)

示例#25

0

显示文件

文件： parallel.py 项目： thanseefpp/ecart-myproject

def _map_multiprocess(func: Callable[[S], T],
                      iterable: Iterable[S],
                      chunksize: int = 1) -> Iterator[T]:
    """Chop iterable into chunks and submit them to a process pool.

    For very long iterables using a large value for chunksize can make
    the job complete much faster than using the default value of 1.

    Return an unordered iterator of the results.
    """
    with closing(ProcessPool()) as pool:
        return pool.imap_unordered(func, iterable, chunksize)

示例#26

0

显示文件

文件： download_images.py 项目： AlexandruHodorogea/GoogleLandmarks

def loader(df):
    if not os.path.exists(OUT_DIR):
        os.mkdir(OUT_DIR)

    key_url_list = parse_data(df)
    pool = multiprocessing.pool.ThreadPool(processes=NUM_WORKERS)
    failures = sum(pool.imap_unordered(Downloader(), key_url_list))

    print('Total number of download failures: %s out of %s' %
          (failures, len(key_url_list)))
    pool.close()
    pool.terminate()

示例#27

0

显示文件

文件： ppar.py 项目： ChrisCummins/format

def MapNativeProcessingBinaries(
    binaries: typing.List[str],
    input_protos: typing.List[pbutil.ProtocolBuffer],
    output_proto_classes: typing.List[typing.Type],
    pool: typing.Optional[multiprocessing.Pool] = None,
    num_processes: typing.Optional[int] = None,
) -> typing.Iterator[_MapWorker]:
    """Run a protocol buffer processing binary over a set of inputs.

  Args:
    binary_data_path: The path of the binary to execute, as provied to
      bazelutil.DataPath().
    input_protos: An iterable list of input protos.
    output_proto_class: The proto class of the output.
    binary_args: An optional list of additional arguments to pass to binaries.
    pool: The multiprocessing pool to use.
    num_processes: The number of processes for the multiprocessing pool.

  Returns:
    A generator of _MapWorker instances. The order is random.
  """
    if not len(binaries) == len(input_protos):
        raise ValueError("Number of binaries does not equal protos")

    cmds = [[bazelutil.DataPath(b)] for b in binaries]

    # Read all inputs to a list. We need the inputs in a list so that we can
    # map an inputs position in the list to a _MapWorker.id.
    input_protos = list(input_protos)
    output_proto_classes = list(output_proto_classes)

    # Create the multiprocessing pool to use, if not provided.
    pool = pool or multiprocessing.Pool(processes=num_processes)

    map_worker_iterator = (_MapWorker(
        id,
        cmd,
        input_proto,
    ) for id, (
        cmd,
        input_proto,
    ) in enumerate(zip(cmds, input_protos)))

    for map_worker in pool.imap_unordered(
            _RunNativeProtoProcessingWorker,
            map_worker_iterator,
    ):
        map_worker.SetProtos(
            input_protos[map_worker.id],
            output_proto_classes[map_worker.id],
        )
        yield map_worker

示例#28

0

显示文件

def parallel_perft(pool: multiprocessing.pool.Pool, depth: int, board: chess.Board) -> int:
    if depth == 1:
        return board.legal_moves.count()
    elif depth > 1:
        def successors(board: chess.Board) -> Iterator[chess.Board]:
            for move in board.legal_moves:
                board_after = board.copy(stack=False)
                board_after.push(move)
                yield board_after

        return sum(pool.imap_unordered(functools.partial(perft, depth - 1), successors(board)))
    else:
        return 1

示例#29

0

显示文件

文件： peek.py 项目： SNH48Live/KVM48

def peek_total_size(urls: Iterator[str]) -> Tuple[int, int]:
    with multiprocessing.pool.ThreadPool(processes=16) as pool:
        it = pool.imap_unordered(peek_content_length, urls)
        pool.close()
        pool.join()
        total_size = 0
        unknown_files = 0
        for size in it:
            if size is None:
                unknown_files += 1
            else:
                total_size += size
        return total_size, unknown_files

示例#30

0

显示文件

def AnalyzeAllSongs(music_directory):
    song_list = []
    file_list = os.listdir(music_directory)
    for file in file_list:
        if file.endswith(".mp3"):
            song_name = file[0 : len(file) - 4]
            song_list.append(song_name)

    if __name__ == '__main__':
        num_processes = 8
        pool = MyPool(num_processes)
        song_list = pool.imap_unordered(AnalyzeSong, song_list)

    return song_list

示例#31

0

显示文件

文件： make_db.py 项目： vijtrip2/test-infra-1

def get_all_builds(db, jobs_dir, metadata, threads, client_class, build_limit):
    """
    Adds information about tests to a dictionary.

    Args:
        jobs_dir: the GCS path containing jobs.
        metadata: a dict of metadata about the jobs_dir.
        threads: how many threads to use to download build information.
        client_class: a constructor for a GCSClient (or a subclass).
    """
    gcs = client_class(jobs_dir, metadata)

    print(f'Loading builds from {jobs_dir}')
    sys.stdout.flush()

    builds_have = db.get_existing_builds(jobs_dir)
    print(f'already have {len(builds_have)} builds')
    sys.stdout.flush()

    jobs_and_builds = gcs.get_builds(builds_have, build_limit)
    pool = None
    if threads > 1:
        pool = multiprocessing.Pool(threads, mp_init_worker,
                                    (jobs_dir, metadata, client_class))
        builds_iterator = pool.imap_unordered(get_started_finished,
                                              jobs_and_builds)
    else:
        global WORKER_CLIENT  # pylint: disable=global-statement
        WORKER_CLIENT = gcs
        builds_iterator = (get_started_finished(job_build)
                           for job_build in jobs_and_builds)

    try:
        for n, (build_dir, started, finished) in enumerate(builds_iterator):
            if not build_dir:
                continue  # skip builds that raised exceptions
            print(f'inserting build: {build_dir}')
            if started or finished:
                db.insert_build(build_dir, started, finished)
            if n % 200 == 0:
                db.commit()
    except KeyboardInterrupt:
        if pool:
            pool.terminate()
        raise
    else:
        if pool:
            pool.close()
            pool.join()
    db.commit()

示例#32

0

显示文件

文件： nova-tournament.py 项目： mhashas/intelligent-systems

def main():

    pool = multiprocessing.pool.ThreadPool(args.parallelism)

    bots = []
    for id, botname in enumerate(args.players):
        bots.append((id, util.load_player(botname)))

    wins = [0] * len(bots)

    games = list(itertools.combinations(bots, 2))
    random.shuffle(games)

    matches = len(games)*args.matches*len(args.planets)
    rounds = matches * args.rounds

    log("{} Bots, {} Maps, {} Games, {} Matches, {} Rounds, 1 victor".format(len(bots), len(args.planets),
                                                                             len(games), matches, rounds))

    scores = lambda: sorted(zip(wins, args.players), key=lambda x: x[0], reverse=True)

    try:
        i = 0
        for ret in pool.imap_unordered(execute, gen_rounds(games)):
            i += 1
            (gid, mid, rid), winner, (pid1, pid2), (map_size, seed) = ret
            if winner is None:
                result = "DRAW"
            else:
                result = args.players[winner]
                wins[winner] += 1

            log("({}:{}:{} | {}:{} | {}:{}): {}".format(gid, mid, rid, map_size, seed, pid1, pid2, result), lvl=2)

            if i % NOTIFY_AMOUNT == 0:
                log("Finished {}/{} rounds ({:.2f})%. Current top 3: {}".format(i, rounds, (float(i) / rounds * 100),
                                                                                scores()[:3]))
    except KeyboardInterrupt:
        log("Tournament interrupted by user", type="FAIL")
        pool.terminate()
        pool.join()
        sys.exit(1)

    pool.close()
    pool.join()

    log("All games finished", type="SUCCESS")
    for i, (wins, bot) in enumerate(scores()):
        log("{:3}. {:20} ({})".format(i, bot, wins))

示例#33

0

显示文件

文件： __init__.py 项目： zongchangli/gat

    def computeSamples(self, work, report_interval=100):
        '''compute samples according to work.

        returns a list of results.
        '''
        n = len(work)

        E.debug('sampling will work on %i items' % n)

        results = []

        if self.num_threads == 0:
            for i, w in enumerate(work):
                r = computeSample(
                    (w, self.samples_outfile, self.outfile_sample_metrics,
                     None))
                if i % report_interval == 0:
                    E.info("%i/%i done (%5.2f)" % (i, n, 100.0 * i / n))
                results.append(r)
        else:
            E.info("generating processpool with %i threads for %i items" %
                   (self.num_threads, len(work)))

            manager = multiprocessing.Manager()

            lock = manager.Lock()

            pool = multiprocessing.Pool(self.num_threads)

            # use file names - not files when multiprocessing
            samples_outfile, metrics_outfile = None, None
            if self.samples_outfile:
                samples_outfile = self.samples_outfile.name
                self.samples_outfile.flush()
            if self.outfile_sample_metrics:
                metrics_outfile = self.outfile_sample_metrics.name
                self.outfile_sample_metrics.flush()

            ww = [(w, samples_outfile, metrics_outfile, lock) for w in work]

            for i, r in enumerate(pool.imap_unordered(computeSample, ww)):
                if i % report_interval == 0:
                    E.info("%i/%i done (%5.2f)" % (i, n, 100.0 * i / n))
                results.append(r)

            pool.close()
            pool.join()

        return results

示例#34

0

显示文件

文件： nova-train.py 项目： mhashas/intelligent-systems

def main():

    pool = multiprocessing.Pool(processes=args.parallelism)

    bots = []
    for id, botname in enumerate(args.players):
        bots.append(util.load_player(botname))

    matches = len(bots) * args.matches * len(args.planets)

    log("Training against {} Bots, {} Maps, {} Matches".format(
        len(bots), len(args.planets), matches))
    data, target = [], []

    try:
        i = 0
        for ret in pool.imap_unordered(execute, gen_rounds(bots)):
            i += 1
            (bid, mid), winner, state_vectors, (map_size, seed) = ret

            if winner == 1:
                result = 'won'
            elif winner == 2:
                result = 'lost'
            else:
                result = 'draw'

            data += state_vectors
            target += [result] * len(state_vectors)

            log("({}:{} | {}:{}): {}".format(bid, mid, map_size, seed, result),
                lvl=1)

            if i % NOTIFY_AMOUNT == 0:
                log("Finished {}/{} matches ({:.2f})%.".format(
                    i, matches, (float(i) / matches * 100)))
    except KeyboardInterrupt:
        log("Tournament interrupted by user", type="FAIL")
        pool.terminate()
        pool.join()
        sys.exit(1)

    pool.close()
    pool.join()

    log("All games finished", type="SUCCESS")

    generate_model(data, target)

示例#35

0

显示文件

文件： make_db.py 项目： Kashomon/test-infra

def get_builds(db, jobs_dir, metadata, threads, client_class):
    """
    Adds information about tests to a dictionary.

    Args:
        jobs_dir: the GCS path containing jobs.
        metadata: a dict of metadata about the jobs_dir.
        threads: how many threads to use to download build information.
        client_class: a constructor for a GCSClient (or a subclass).
    """
    gcs = client_class(jobs_dir, metadata)

    print('Loading builds from %s' % jobs_dir)
    sys.stdout.flush()

    builds_have = db.get_existing_builds(jobs_dir)
    print('already have %d builds' % len(builds_have))
    sys.stdout.flush()

    jobs_and_builds = gcs.get_builds(builds_have)
    pool = None
    if threads > 1:
        pool = multiprocessing.Pool(threads, mp_init_worker,
                                    (jobs_dir, metadata, client_class))
        builds_iterator = pool.imap_unordered(
            get_started_finished, jobs_and_builds)
    else:
        global WORKER_CLIENT  # pylint: disable=global-statement
        WORKER_CLIENT = gcs
        builds_iterator = (
            get_started_finished(job_build) for job_build in jobs_and_builds)

    try:
        for n, (build_dir, started, finished) in enumerate(builds_iterator):
            print(build_dir)
            if started or finished:
                db.insert_build(build_dir, started, finished)
            if n % 200 == 0:
                db.commit()
    except KeyboardInterrupt:
        if pool:
            pool.terminate()
        raise
    else:
        if pool:
            pool.close()
            pool.join()
    db.commit()

示例#36

0

显示文件

文件： stream.py 项目： fejta/test-infra

def get_started_finished(gcs_client, db, todo):
    """Download started/finished.json from build dirs in todo."""
    acks = []
    build_dirs = []
    pool = multiprocessing.pool.ThreadPool(16)
    try:
        for ack_id, (build_dir, started, finished) in pool.imap_unordered(
                lambda (ack_id, job, build): (ack_id, gcs_client.get_started_finished(job, build)),
                todo):
            if finished:
                if not db.insert_build(build_dir, started, finished):
                    print('already present??')
                start = time.localtime(started.get('timestamp', 0) if started else 0)
                print(build_dir, bool(started), bool(finished),
                      time.strftime('%F %T %Z', start),
                      finished and finished.get('result'))
                build_dirs.append(build_dir)
                acks.append(ack_id)
            else:
                print('finished.json missing?', build_dir, started, finished)
    finally:
        pool.close()
    db.commit()
    return acks, build_dirs

示例#37

0

显示文件

文件： iptestcontroller.py 项目： neutrous/ipython

def run_iptestall(options):
    """Run the entire IPython test suite by calling nose and trial.

    This function constructs :class:`IPTester` instances for all IPython
    modules and package and then runs each of them.  This causes the modules
    and packages of IPython to be tested each in their own subprocess using
    nose.
    
    Parameters
    ----------

    All parameters are passed as attributes of the options object.

    testgroups : list of str
      Run only these sections of the test suite. If empty, run all the available
      sections.

    fast : int or None
      Run the test suite in parallel, using n simultaneous processes. If None
      is passed, one process is used per CPU core. Default 1 (i.e. sequential)

    inc_slow : bool
      Include slow tests, like IPython.parallel. By default, these tests aren't
      run.

    xunit : bool
      Produce Xunit XML output. This is written to multiple foo.xunit.xml files.

    coverage : bool or str
      Measure code coverage from tests. True will store the raw coverage data,
      or pass 'html' or 'xml' to get reports.
    """
    if options.fast != 1:
        # If running in parallel, capture output so it doesn't get interleaved
        TestController.buffer_output = True

    if options.testgroups:
        to_run = [PyTestController(name) for name in options.testgroups]
        not_run = []
    else:
        to_run, not_run = prepare_py_test_controllers(inc_slow=options.all)

    configure_controllers(to_run, xunit=options.xunit, coverage=options.coverage)

    def justify(ltext, rtext, width=70, fill="-"):
        ltext += " "
        rtext = (" " + rtext).rjust(width - len(ltext), fill)
        return ltext + rtext

    # Run all test runners, tracking execution time
    failed = []
    t_start = time.time()

    print()
    if options.fast == 1:
        # This actually means sequential, i.e. with 1 job
        for controller in to_run:
            print("IPython test group:", controller.section)
            controller, res = do_run(controller)
            if res:
                failed.append(controller)
                if res == -signal.SIGINT:
                    print("Interrupted")
                    break
            print()

    else:
        # Run tests concurrently
        try:
            pool = multiprocessing.pool.ThreadPool(options.fast)
            for (controller, res) in pool.imap_unordered(do_run, to_run):
                res_string = "OK" if res == 0 else "FAILED"
                print(justify("IPython test group: " + controller.section, res_string))
                if res:
                    print(bytes_to_str(controller.stdout))
                    failed.append(controller)
                    if res == -signal.SIGINT:
                        print("Interrupted")
                        break
        except KeyboardInterrupt:
            return

    for controller in not_run:
        print(justify("IPython test group: " + controller.section, "NOT RUN"))

    t_end = time.time()
    t_tests = t_end - t_start
    nrunners = len(to_run)
    nfail = len(failed)
    # summarize results
    print("_" * 70)
    print("Test suite completed for system with the following information:")
    print(report())
    print("Ran %s test groups in %.3fs" % (nrunners, t_tests))
    print()
    print("Status: ", end="")
    if not failed:
        print("OK")
    else:
        # If anything went wrong, point out what command to rerun manually to
        # see the actual errors and individual summary
        failed_sections = [c.section for c in failed]
        print("ERROR - {} out of {} test groups failed ({}).".format(nfail, nrunners, ", ".join(failed_sections)))
        print()
        print("You may wish to rerun these, with:")
        print("  iptest", *failed_sections)
        print()

    if options.coverage:
        from coverage import coverage

        cov = coverage(data_file=".coverage")
        cov.combine()
        cov.save()

        # Coverage HTML report
        if options.coverage == "html":
            html_dir = "ipy_htmlcov"
            shutil.rmtree(html_dir, ignore_errors=True)
            print("Writing HTML coverage report to %s/ ... " % html_dir, end="")
            sys.stdout.flush()

            # Custom HTML reporter to clean up module names.
            from coverage.html import HtmlReporter

            class CustomHtmlReporter(HtmlReporter):
                def find_code_units(self, morfs):
                    super(CustomHtmlReporter, self).find_code_units(morfs)
                    for cu in self.code_units:
                        nameparts = cu.name.split(os.sep)
                        if "IPython" not in nameparts:
                            continue
                        ix = nameparts.index("IPython")
                        cu.name = ".".join(nameparts[ix:])

            # Reimplement the html_report method with our custom reporter
            cov._harvest_data()
            cov.config.from_args(omit="*%stests" % os.sep, html_dir=html_dir, html_title="IPython test coverage")
            reporter = CustomHtmlReporter(cov, cov.config)
            reporter.report(None)
            print("done.")

        # Coverage XML report
        elif options.coverage == "xml":
            cov.xml_report(outfile="ipy_coverage.xml")

    if failed:
        # Ensure that our exit code indicates failure
        sys.exit(1)

示例#38

0

显示文件

文件： iptestcontroller.py 项目： Hypnotoad07/ipython

def run_iptestall(options):
    """Run the entire IPython test suite by calling nose and trial.

    This function constructs :class:`IPTester` instances for all IPython
    modules and package and then runs each of them.  This causes the modules
    and packages of IPython to be tested each in their own subprocess using
    nose.

    Parameters
    ----------

    All parameters are passed as attributes of the options object.

    testgroups : list of str
      Run only these sections of the test suite. If empty, run all the available
      sections.

    fast : int or None
      Run the test suite in parallel, using n simultaneous processes. If None
      is passed, one process is used per CPU core. Default 1 (i.e. sequential)

    inc_slow : bool
      Include slow tests, like IPython.parallel. By default, these tests aren't
      run.

    xunit : bool
      Produce Xunit XML output. This is written to multiple foo.xunit.xml files.

    coverage : bool or str
      Measure code coverage from tests. True will store the raw coverage data,
      or pass 'html' or 'xml' to get reports.

    extra_args : list
      Extra arguments to pass to the test subprocesses, e.g. '-v'
    """
    if options.fast != 1:
        # If running in parallel, capture output so it doesn't get interleaved
        TestController.buffer_output = True

    to_run, not_run = prepare_controllers(options)

    def justify(ltext, rtext, width=70, fill='-'):
        ltext += ' '
        rtext = (' ' + rtext).rjust(width - len(ltext), fill)
        return ltext + rtext

    # Run all test runners, tracking execution time
    failed = []
    t_start = time.time()

    print()
    if options.fast == 1:
        # This actually means sequential, i.e. with 1 job
        for controller in to_run:
            print('IPython test group:', controller.section)
            sys.stdout.flush()  # Show in correct order when output is piped
            controller, res = do_run(controller)
            if res:
                failed.append(controller)
                if res == -signal.SIGINT:
                    print("Interrupted")
                    break
            print()

    else:
        # Run tests concurrently
        try:
            pool = multiprocessing.pool.ThreadPool(options.fast)
            for (controller, res) in pool.imap_unordered(do_run, to_run):
                res_string = 'OK' if res == 0 else 'FAILED'
                print(justify('IPython test group: ' + controller.section, res_string))
                if res:
                    print(bytes_to_str(controller.stdout))
                    failed.append(controller)
                    if res == -signal.SIGINT:
                        print("Interrupted")
                        break
        except KeyboardInterrupt:
            return

    for controller in not_run:
        print(justify('IPython test group: ' + controller.section, 'NOT RUN'))

    t_end = time.time()
    t_tests = t_end - t_start
    nrunners = len(to_run)
    nfail = len(failed)
    # summarize results
    print('_'*70)
    print('Test suite completed for system with the following information:')
    print(report())
    took = "Took %.3fs." % t_tests
    print('Status: ', end='')
    if not failed:
        print('OK (%d test groups).' % nrunners, took)
    else:
        # If anything went wrong, point out what command to rerun manually to
        # see the actual errors and individual summary
        failed_sections = [c.section for c in failed]
        print('ERROR - {} out of {} test groups failed ({}).'.format(nfail,
                                  nrunners, ', '.join(failed_sections)), took)
        print()
        print('You may wish to rerun these, with:')
        print('  iptest', *failed_sections)
        print()

    if options.coverage:
        from coverage import coverage
        cov = coverage(data_file='.coverage')
        cov.combine()
        cov.save()

        # Coverage HTML report
        if options.coverage == 'html':
            html_dir = 'ipy_htmlcov'
            shutil.rmtree(html_dir, ignore_errors=True)
            print("Writing HTML coverage report to %s/ ... " % html_dir, end="")
            sys.stdout.flush()

            # Custom HTML reporter to clean up module names.
            from coverage.html import HtmlReporter
            class CustomHtmlReporter(HtmlReporter):
                def find_code_units(self, morfs):
                    super(CustomHtmlReporter, self).find_code_units(morfs)
                    for cu in self.code_units:
                        nameparts = cu.name.split(os.sep)
                        if 'IPython' not in nameparts:
                            continue
                        ix = nameparts.index('IPython')
                        cu.name = '.'.join(nameparts[ix:])

            # Reimplement the html_report method with our custom reporter
            cov._harvest_data()
            cov.config.from_args(omit='*{0}tests{0}*'.format(os.sep), html_dir=html_dir,
                                 html_title='IPython test coverage',
                                )
            reporter = CustomHtmlReporter(cov, cov.config)
            reporter.report(None)
            print('done.')

        # Coverage XML report
        elif options.coverage == 'xml':
            cov.xml_report(outfile='ipy_coverage.xml')

    if failed:
        # Ensure that our exit code indicates failure
        sys.exit(1)

示例#39

0

显示文件

文件： patchupdate.py 项目： Acidburn0zzz/wine-staging

def generate_script(all_patches):
    """Resolve dependencies, and afterwards check if everything applies properly."""
    depends     = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled])
    resolved    = resolve_dependencies(all_patches, depends=depends)
    max_patches = max(resolved) + 1

    # Generate timestamps based on dependencies, still required for binary patches
    # Find out which files are modified by multiple patches
    modified_files = {}
    for i, patch in [(i, all_patches[i]) for i in resolved]:
        patch.verify_time = [0]*max_patches
        patch.verify_time[i] += 1
        for j in patch.depends:
            patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time)

        for f in patch.modified_files:
            if f not in modified_files:
                modified_files[f] = []
            modified_files[f].append(i)

    # Check dependencies
    dependency_cache = _load_dict(config.path_cache)
    pool = multiprocessing.pool.ThreadPool(processes=4)
    try:
        for filename, indices in modified_files.iteritems():

            # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case
            if contains_binary_patch(all_patches, indices, filename):
                if not causal_time_relation_any(all_patches, indices):
                    raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" %
                                            (filename, ", ".join([all_patches[i].name for i in indices])))
                continue

            original_content = get_wine_file(filename)
            original_hash    = _sha256(original_content)
            selected_patches = select_patches(all_patches, indices, filename)

            # Generate a unique id based on the original content, the selected patches
            # and the dependency information. Since this information only has to be compared
            # we can throw it into a single hash.
            m = hashlib.sha256()
            m.update(original_hash)
            for i in indices:
                m.update("P%s" % selected_patches[i][0])
                for j in indices:
                    if causal_time_smaller(all_patches[j].verify_time, all_patches[i].verify_time):
                        m.update("D%s" % selected_patches[j][0])
            unique_hash = m.digest()

            # Skip checks if it matches the information from the cache
            try:
                if dependency_cache[filename] == unique_hash:
                    continue
            except KeyError:
                pass

            # Show a progress bar while applying the patches - this task might take some time
            chunk_size = 20
            with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress:

                def test_apply(current):
                    set_apply = [(i, all_patches[i]) for i in current]
                    set_skip  = [(i, all_patches[i]) for i in indices if i not in current]

                    # Check if there is any patch2 which depends directly or indirectly on patch1.
                    # If this is the case we found an impossible situation, we can be skipped in this test.
                    for i, patch1 in set_apply:
                        for j, patch2 in set_skip:
                            if causal_time_smaller(patch2.verify_time, patch1.verify_time):
                                return None # we can skip this test

                    try:
                        original = original_content
                        for i, _ in set_apply:
                            original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0)
                    except patchutils.PatchApplyError:
                        return current

                    return None # everything is fine

                def test_apply_seq(current_list):
                    for current in current_list:
                        failed = test_apply(current)
                        if failed is not None:
                            return failed
                    return None

                iterables = []
                for i in xrange(0, len(indices) + 1):
                    iterables.append(itertools.combinations(indices, i))
                it = _split_seq(itertools.chain(*iterables), chunk_size)
                for k, failed in enumerate(pool.imap_unordered(test_apply_seq, it)):
                    if failed is not None:
                        progress.finish("<failed to apply>")
                        raise PatchUpdaterError("Changes to file %s don't apply: %s" %
                                                (filename, ", ".join([all_patches[i].name for i in failed])))
                    progress.update(k)

            # Update the dependency cache
            dependency_cache[filename] = unique_hash

    finally:
        pool.close()
        _save_dict(config.path_cache, dependency_cache)

    # Generate code for helper functions
    lines = []
    lines.append("# Enable or disable all patchsets\n")
    lines.append("patch_enable_all ()\n")
    lines.append("{\n")
    for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name):
        if patch.is_category: continue
        patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_")
        lines.append("\t%s=\"$1\"\n" % patch.variable)
    lines.append("}\n")
    lines.append("\n")

    lines.append("# Enable or disable all categories\n")
    lines.append("category_enable_all ()\n")
    lines.append("{\n")
    for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name):
        if not patch.is_category: continue
        patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_")
        lines.append("\t%s=\"$1\"\n" % patch.variable)
    lines.append("}\n")
    lines.append("\n")

    lines.append("# Enable or disable a specific patchset/category\n")
    lines.append("patch_enable ()\n")
    lines.append("{\n")
    lines.append("\tcase \"$1\" in\n")
    for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name):
        lines.append("\t\t%s)\n" % patch.name)
        lines.append("\t\t\t%s=\"$2\"\n" % patch.variable)
        lines.append("\t\t\t;;\n")
    lines.append("\t\t*)\n")
    lines.append("\t\t\treturn 1\n")
    lines.append("\t\t\t;;\n")
    lines.append("\tesac\n")
    lines.append("\treturn 0\n")
    lines.append("}\n")
    lines_helpers = lines

    # Generate code for dependency resolver
    lines = []
    for i, patch in [(i, all_patches[i]) for i in reversed(resolved)]:
        if len(patch.depends):
            lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable)
            for j in sorted(patch.depends):
                lines.append("\tif test \"$%s\" -gt 1; then\n" % all_patches[j].variable)
                lines.append("\t\tabort \"Patchset %s disabled, but %s depends on that.\"\n" %
                             (all_patches[j].name, patch.name))
                lines.append("\tfi\n")
            for j in sorted(patch.depends):
                lines.append("\t%s=1\n" % all_patches[j].variable)
            lines.append("fi\n\n")
    lines_resolver = lines

    # Generate code for applying all patchsets
    lines = []
    for i, patch in [(i, all_patches[i]) for i in resolved]:

        # Categories do not have any files associated, so just skip over
        if len(patch.files) == 0:
            continue

        lines.append("# Patchset %s\n" % patch.name)
        lines.append("# |\n")

        # List all bugs fixed by this patchset
        if any([bugid is not None for bugid, bugname in patch.fixes]):
            lines.append("# | This patchset fixes the following Wine bugs:\n")
            for bugid, bugname in patch.fixes:
                if bugid is not None:
                    lines.append("# |   *\t%s\n" % "\n# | \t".join(textwrap.wrap("[#%d] %s" % (bugid, bugname), 120)))
            lines.append("# |\n")

        # List all modified files
        lines.append("# | Modified files:\n")
        lines.append("# |   *\t%s\n" % "\n# | \t".join(textwrap.wrap(", ".join(sorted(patch.modified_files)), 120)))
        lines.append("# |\n")
        lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable)
        for f in patch.files:
            lines.append("\tpatch_apply %s\n" % os.path.join(patch.name, f))
        if len(patch.patches):
            lines.append("\t(\n")
            for p in _unique(patch.patches, key=lambda p: (p.patch_author, p.patch_subject, p.patch_revision)):
                lines.append("\t\techo '+    { \"%s\", \"%s\", %d },';\n" %
                             (_escape(p.patch_author), _escape(p.patch_subject), p.patch_revision))
            lines.append("\t) >> \"$patchlist\"\n")
        lines.append("fi\n\n")
    lines_apply = lines

    with open(config.path_template_script) as template_fp:
        template = template_fp.read()
    with open(config.path_script, "w") as fp:
        fp.write(template.format(latest_staging_version=_latest_staging_version(),
                                 latest_wine_commit=latest_wine_commit,
                                 patch_helpers="".join(lines_helpers).rstrip("\n"),
                                 patch_resolver="".join(lines_resolver).rstrip("\n"),
                                 patch_apply="".join(lines_apply).rstrip("\n")))

    # Add changes to git
    subprocess.call(["git", "add", config.path_script])

示例#40

0

显示文件

文件： s3.py 项目： mpenkov/smart_open

def iter_bucket(bucket_name, prefix='', accept_key=None,
                key_limit=None, workers=16, retries=3):
    """
    Iterate and download all S3 objects under `s3://bucket_name/prefix`.

    Parameters
    ----------
    bucket_name: str
        The name of the bucket.
    prefix: str, optional
        Limits the iteration to keys starting wit the prefix.
    accept_key: callable, optional
        This is a function that accepts a key name (unicode string) and
        returns True/False, signalling whether the given key should be downloaded.
        The default behavior is to accept all keys.
    key_limit: int, optional
        If specified, the iterator will stop after yielding this many results.
    workers: int, optional
        The number of subprocesses to use.
    retries: int, optional
        The number of time to retry a failed download.

    Yields
    ------
    str
        The full key name (does not include the bucket name).
    bytes
        The full contents of the key.

    Notes
    -----
    The keys are processed in parallel, using `workers` processes (default: 16),
    to speed up downloads greatly. If multiprocessing is not available, thus
    _MULTIPROCESSING is False, this parameter will be ignored.

    Examples
    --------

      >>> # get all JSON files under "mybucket/foo/"
      >>> for key, content in iter_bucket(bucket_name, prefix='foo/', accept_key=lambda key: key.endswith('.json')):
      ...     print key, len(content)

      >>> # limit to 10k files, using 32 parallel workers (default is 16)
      >>> for key, content in iter_bucket(bucket_name, key_limit=10000, workers=32):
      ...     print key, len(content)
    """
    if accept_key is None:
        accept_key = lambda key: True

    #
    # If people insist on giving us bucket instances, silently extract the name
    # before moving on.  Works for boto3 as well as boto.
    #
    try:
        bucket_name = bucket_name.name
    except AttributeError:
        pass

    total_size, key_no = 0, -1
    key_iterator = _list_bucket(bucket_name, prefix=prefix, accept_key=accept_key)
    download_key = functools.partial(_download_key, bucket_name=bucket_name, retries=retries)

    with _create_process_pool(processes=workers) as pool:
        result_iterator = pool.imap_unordered(download_key, key_iterator)
        for key_no, (key, content) in enumerate(result_iterator):
            if True or key_no % 1000 == 0:
                logger.info(
                    "yielding key #%i: %s, size %i (total %.1fMB)",
                    key_no, key, len(content), total_size / 1024.0 ** 2
                )
            yield key, content
            total_size += len(content)

            if key_limit is not None and key_no + 1 >= key_limit:
                # we were asked to output only a limited number of keys => we're done
                break
    logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))

示例#41

0

显示文件

文件： jstest.py 项目： ACGC/notebook

def run_jstestall(options):
    """Run the entire Javascript test suite.
    
    This function constructs TestControllers and runs them in subprocesses.

    Parameters
    ----------

    All parameters are passed as attributes of the options object.

    testgroups : list of str
      Run only these sections of the test suite. If empty, run all the available
      sections.

    fast : int or None
      Run the test suite in parallel, using n simultaneous processes. If None
      is passed, one process is used per CPU core. Default 1 (i.e. sequential)

    inc_slow : bool
      Include slow tests. By default, these tests aren't run.

    slimerjs : bool
      Use slimerjs if it's installed instead of phantomjs for casperjs tests.

    url : unicode
      Address:port to use when running the JS tests.

    xunit : bool
      Produce Xunit XML output. This is written to multiple foo.xunit.xml files.

    extra_args : list
      Extra arguments to pass to the test subprocesses, e.g. '-v'
    """
    to_run, not_run = prepare_controllers(options)

    def justify(ltext, rtext, width=70, fill='-'):
        ltext += ' '
        rtext = (' ' + rtext).rjust(width - len(ltext), fill)
        return ltext + rtext

    # Run all test runners, tracking execution time
    failed = []
    t_start = time.time()

    print()
    if options.fast == 1:
        # This actually means sequential, i.e. with 1 job
        for controller in to_run:
            print('Test group:', controller.section)
            sys.stdout.flush()  # Show in correct order when output is piped
            controller, res = do_run(controller, buffer_output=False)
            if res:
                failed.append(controller)
                if res == -signal.SIGINT:
                    print("Interrupted")
                    break
            print()

    else:
        # Run tests concurrently
        try:
            pool = multiprocessing.pool.ThreadPool(options.fast)
            for (controller, res) in pool.imap_unordered(do_run, to_run):
                res_string = 'OK' if res == 0 else 'FAILED'
                print(justify('Test group: ' + controller.section, res_string))
                if res:
                    controller.print_extra_info()
                    print(bytes_to_str(controller.stdout))
                    failed.append(controller)
                    if res == -signal.SIGINT:
                        print("Interrupted")
                        break
        except KeyboardInterrupt:
            return

    for controller in not_run:
        print(justify('Test group: ' + controller.section, 'NOT RUN'))

    t_end = time.time()
    t_tests = t_end - t_start
    nrunners = len(to_run)
    nfail = len(failed)
    # summarize results
    print('_'*70)
    print('Test suite completed for system with the following information:')
    print(report())
    took = "Took %.3fs." % t_tests
    print('Status: ', end='')
    if not failed:
        print('OK (%d test groups).' % nrunners, took)
    else:
        # If anything went wrong, point out what command to rerun manually to
        # see the actual errors and individual summary
        failed_sections = [c.section for c in failed]
        print('ERROR - {} out of {} test groups failed ({}).'.format(nfail,
                                  nrunners, ', '.join(failed_sections)), took)
        print()
        print('You may wish to rerun these, with:')
        print('  python -m notebook.jstest', *failed_sections)
        print()

    if failed:
        # Ensure that our exit code indicates failure
        sys.exit(1)

示例#42

0

显示文件

文件： oracle_spanner.py 项目： daureg/magnet

    if len(gold) > 0:
        matthews_score = matthews_corrcoef(gold, pred)
    if within_tree == 0 or nb_paths == 0:
        return (root, -5, -5, -5, -5)
    return (root, outside_edges/len(tree_nodes), one_neg_edges/within_tree,
            matthews_score, total_path_length/nb_paths)


def tree_score(inside_edges, outside_edges):
    return inside_edges - outside_edges


def merge_trees(list_of_tree):
    list_of_tree = sorted(list_of_tree, key=lambda x: x[1])


def cbfs(root):
    return consistent_bfs(ADJACENCY, EDGE_SIGNS, root)

if __name__ == '__main__':
    # pylint: disable=C0103
    import persistent as p
    from multiprocessing import pool
    import random
    pool = pool.Pool(13)
    roots = random.sample(list(ADJACENCY.keys()), 10000)
    res = pool.imap_unordered(cbfs, roots, chunksize=len(roots)//13)
    pool.close()
    pool.join()
    p.save_var('cbfs_val.my', list(res))

示例#43

0

显示文件

文件： patchupdate.py 项目： Endle/wine-staging-mirror

def generate_apply_order(all_patches, skip_checks=False):
    """Resolve dependencies, and afterwards check if everything applies properly."""
    depends     = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled])
    resolved    = resolve_dependencies(all_patches, depends=depends)
    max_patches = max(resolved) + 1

    if skip_checks:
        return resolved

    # Generate timestamps based on dependencies, still required for binary patches
    # Find out which files are modified by multiple patches
    modified_files = {}
    for i, patch in [(i, all_patches[i]) for i in resolved]:
        patch.verify_time = [0]*max_patches
        patch.verify_time[i] += 1
        for j in patch.depends:
            patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time)

        for f in patch.modified_files:
            if f not in modified_files:
                modified_files[f] = []
            modified_files[f].append(i)

    # Check dependencies
    dependency_cache = _load_dict(config.path_cache)
    pool = multiprocessing.pool.ThreadPool(processes=4)
    try:
        for filename, indices in modified_files.iteritems():

            # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case
            if contains_binary_patch(all_patches, indices, filename):
                if not causal_time_relation_any(all_patches, indices):
                    raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" %
                                            (filename, ", ".join([all_patches[i].name for i in indices])))
                continue

            original_content = get_wine_file(filename)
            original_hash    = _sha256(original_content)
            selected_patches = select_patches(all_patches, indices, filename)

            # Generate a unique id based on the original content, the selected patches
            # and the dependency information. Since this information only has to be compared
            # we can throw it into a single hash.
            m = hashlib.sha256()
            m.update(original_hash)
            for i in indices:
                m.update("P%s" % selected_patches[i][0])
                for j in indices:
                    if causal_time_smaller(all_patches[j].verify_time, all_patches[i].verify_time):
                        m.update("D%s" % selected_patches[j][0])
            unique_hash = m.digest()

            # Skip checks if it matches the information from the cache
            # For backwards compatibility, convert string entries to list
            if dependency_cache.has_key(filename):
                if not isinstance(dependency_cache[filename], list):
                    dependency_cache[filename] = [dependency_cache[filename]]
                if unique_hash in dependency_cache[filename]:
                    dependency_cache[filename].append(unique_hash)
                    dependency_cache[filename].remove(unique_hash)
                    continue

            # Show a progress bar while applying the patches - this task might take some time
            chunk_size = 20
            with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress:

                def test_apply(current):
                    set_apply = [(i, all_patches[i]) for i in current]
                    set_skip  = [(i, all_patches[i]) for i in indices if i not in current]

                    # Check if there is any patch2 which depends directly or indirectly on patch1.
                    # If this is the case we found an impossible situation, we can be skipped in this test.
                    for i, patch1 in set_apply:
                        for j, patch2 in set_skip:
                            if causal_time_smaller(patch2.verify_time, patch1.verify_time):
                                return True # we can skip this test

                    try:
                        original = original_content
                        for i, _ in set_apply:
                            original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0)
                    except patchutils.PatchApplyError:
                        return False

                    return True # everything is fine

                def test_apply_seq(current_list):
                    for current in current_list:
                        if not test_apply(current):
                            return current
                    return None

                iterables = []
                for i in xrange(0, len(indices) + 1):
                    iterables.append(itertools.combinations(indices, i))
                it = _split_seq(itertools.chain(*iterables), chunk_size)
                for k, failed in enumerate(pool.imap_unordered(test_apply_seq, it)):
                    if failed is not None:
                        progress.finish("<failed to apply>")
                        raise PatchUpdaterError("Changes to file %s don't apply: %s" %
                                                (filename, ", ".join([all_patches[i].name for i in failed])))
                    progress.update(k)

            # Update the dependency cache, store max 10 entries per file
            if not dependency_cache.has_key(filename):
                dependency_cache[filename] = []
            dependency_cache[filename].append(unique_hash)
            dependency_cache[filename] = dependency_cache[filename][-10:]

        # Delete outdated cache information
        for filename in dependency_cache.keys():
            if not modified_files.has_key(filename):
                del dependency_cache[filename]
    finally:
        pool.close()
        _save_dict(config.path_cache, dependency_cache)

    return resolved

示例#44

0

显示文件

文件： patchupdate.py 项目： maharmstone/wine-staging

def generate_script(all_patches):
    """Resolve dependencies, and afterwards check if everything applies properly."""
    depends     = sorted([i for i, patch in all_patches.iteritems() if not patch.disabled])
    resolved    = resolve_dependencies(all_patches, depends=depends)
    max_patches = max(resolved) + 1

    # Generate timestamps based on dependencies, still required for binary patches
    # Find out which files are modified by multiple patches
    modified_files = {}
    for i, patch in [(i, all_patches[i]) for i in resolved]:
        patch.verify_time = [0]*max_patches
        patch.verify_time[i] += 1
        for j in patch.depends:
            patch.verify_time = causal_time_combine(patch.verify_time, all_patches[j].verify_time)

        for f in patch.modified_files:
            if f not in modified_files:
                modified_files[f] = []
            modified_files[f].append(i)

    # Check dependencies
    pool = multiprocessing.pool.ThreadPool(processes=4)
    try:

        # Checking all dependencies takes a very long time, so to improve development speed,
        # run a first quick check with all patches enabled.
        with progressbar.ProgressBar(desc="pre-check ...", total=len(modified_files)) as progress:
            for k, (filename, indices) in enumerate(modified_files.iteritems()):

                # If one of patches is a binary patch, then we cannot / won't verify it - require dependencies in this case
                if contains_binary_patch(all_patches, indices, filename):
                    if not causal_time_relation_any(all_patches, indices):
                        raise PatchUpdaterError("Because of binary patch modifying file %s the following patches need explicit dependencies: %s" %
                                                (filename, ", ".join([all_patches[i].name for i in indices])))
                    continue

                original         = get_wine_file(filename)
                selected_patches = select_patches(all_patches, indices, filename)
                set_apply        = [(i, all_patches[i]) for i in indices]

                try:
                    for i, patch in set_apply:
                        original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0)
                except patchutils.PatchApplyError:
                    progress.finish("<failed to apply>")
                    raise PatchUpdaterError("Changes to file %s don't apply: %s" %
                                            (filename, ", ".join([all_patches[i].name for i in indices])))
                progress.update(k)

        # More detailed checks, required to make sure that dependencies are set correctly
        for filename, indices in modified_files.iteritems():

            if contains_binary_patch(all_patches, indices, filename):
                continue

            original_content = get_wine_file(filename)
            selected_patches = select_patches(all_patches, indices, filename)

            # Show a progress bar while applying the patches - this task might take some time
            chunk_size = 20
            with progressbar.ProgressBar(desc=filename, total=2 ** len(indices) / chunk_size) as progress:

                def test_apply(bitstring):
                    set_apply = [(i, all_patches[i]) for u, i in zip(bitstring, indices) if u]
                    set_skip  = [(i, all_patches[i]) for u, i in zip(bitstring, indices) if not u]

                    # Check if there is any patch2 which depends directly or indirectly on patch1.
                    # If this is the case we found an impossible situation, we can be skipped in this test.
                    for i, patch1 in set_apply:
                        for j, patch2 in set_skip:
                            if causal_time_smaller(patch2.verify_time, patch1.verify_time):
                                return True # we can skip this test

                    try:
                        original = original_content
                        for i, patch in set_apply:
                            original = patchutils.apply_patch(original, selected_patches[i][1], fuzz=0)
                    except patchutils.PatchApplyError:
                        return False

                    return True # everything is fine

                def test_apply_seq(bitstrings):
                    for bitstring in bitstrings:
                        if not test_apply(bitstring):
                            return False
                    return True

                it = _split_seq(itertools.product([0,1], repeat=len(indices)), chunk_size)
                for k, res in enumerate(pool.imap_unordered(test_apply_seq, it)):
                    if not res:
                        progress.finish("<failed to apply>")
                        raise PatchUpdaterError("Changes to file %s don't apply: %s" %
                                                (filename, ", ".join([all_patches[i].name for i in indices])))
                    progress.update(k)
    finally:
        pool.close()

    # Generate code for helper functions
    lines = []
    lines.append("# Enable or disable all patchsets\n")
    lines.append("patch_enable_all ()\n")
    lines.append("{\n")
    for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name):
        patch.variable = "enable_%s" % patch.name.replace("-","_").replace(".","_")
        lines.append("\t%s=\"$1\"\n" % patch.variable)
    lines.append("}\n")
    lines.append("\n")
    lines.append("# Enable or disable a specific patchset\n")
    lines.append("patch_enable ()\n")
    lines.append("{\n")
    lines.append("\tcase \"$1\" in\n")
    for i, patch in sorted([(i, all_patches[i]) for i in resolved], key=lambda x:x[1].name):
        lines.append("\t\t%s)\n" % patch.name)
        lines.append("\t\t\t%s=\"$2\"\n" % patch.variable)
        lines.append("\t\t\t;;\n")
    lines.append("\t\t*)\n")
    lines.append("\t\t\treturn 1\n")
    lines.append("\t\t\t;;\n")
    lines.append("\tesac\n")
    lines.append("\treturn 0\n")
    lines.append("}\n")
    lines_helpers = lines

    # Generate code for dependency resolver
    lines = []
    for i, patch in [(i, all_patches[i]) for i in reversed(resolved)]:
        if len(patch.depends):
            lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable)
            for j in sorted(patch.depends):
                lines.append("\tif test \"$%s\" -gt 1; then\n" % all_patches[j].variable)
                lines.append("\t\tabort \"Patchset %s disabled, but %s depends on that.\"\n" %
                             (all_patches[j].name, patch.name))
                lines.append("\tfi\n")
            for j in sorted(patch.depends):
                lines.append("\t%s=1\n" % all_patches[j].variable)
            lines.append("fi\n\n")
    lines_resolver = lines

    # Generate code for applying all patchsets
    lines = []
    for i, patch in [(i, all_patches[i]) for i in resolved]:
        lines.append("# Patchset %s\n" % patch.name)
        lines.append("# |\n")

        # List all bugs fixed by this patchset
        if any([bugid is not None for bugid, bugname in patch.fixes]):
            lines.append("# | This patchset fixes the following Wine bugs:\n")
            for bugid, bugname in patch.fixes:
                if bugid is not None:
                    lines.append("# |   *\t%s\n" % "\n# | \t".join(textwrap.wrap("[#%d] %s" % (bugid, bugname), 120)))
            lines.append("# |\n")

        # List all modified files
        lines.append("# | Modified files:\n")
        lines.append("# |   *\t%s\n" % "\n# | \t".join(textwrap.wrap(", ".join(sorted(patch.modified_files)), 120)))
        lines.append("# |\n")
        lines.append("if test \"$%s\" -eq 1; then\n" % patch.variable)
        for f in patch.files:
            lines.append("\tpatch_apply %s\n" % os.path.join(patch.name, f))
        if len(patch.patches):
            lines.append("\t(\n")
            for p in _unique(patch.patches, key=lambda p: (p.patch_author, p.patch_subject, p.patch_revision)):
                lines.append("\t\techo '+    { \"%s\", \"%s\", %d },';\n" %
                             (_escape(p.patch_author), _escape(p.patch_subject), p.patch_revision))
            lines.append("\t) >> \"$patchlist\"\n")
        lines.append("fi\n\n")
    lines_apply = lines

    with open(config.path_template_script) as template_fp:
        template = template_fp.read()
    with open(config.path_script, "w") as fp:
        fp.write(template.format(patch_helpers="".join(lines_helpers).rstrip("\n"),
                                 patch_resolver="".join(lines_resolver).rstrip("\n"),
                                 patch_apply="".join(lines_apply).rstrip("\n")))

    # Add changes to git
    subprocess.call(["git", "add", config.path_script])