Пример #1
0
    def _parallel_run_many(self, generator, execution: MLClientCtx,
                           runobj: RunObject) -> RunList:
        results = RunList()
        tasks = generator.generate(runobj)
        handler = runobj.spec.handler
        self._force_handler(handler)
        set_paths(self.spec.pythonpath)
        _, handler = self._get_handler(handler)

        client, function_name = self._get_dask_client(generator.options)
        parallel_runs = generator.options.parallel_runs or 4
        queued_runs = 0
        num_errors = 0

        def process_result(future):
            nonlocal num_errors
            resp, sout, serr = future.result()
            runobj = RunObject.from_dict(resp)
            try:
                log_std(self._db_conn, runobj, sout, serr, skip=self.is_child)
                resp = self._update_run_state(resp)
            except RunError as err:
                resp = self._update_run_state(resp, err=str(err))
                num_errors += 1
            results.append(resp)
            if num_errors > generator.max_errors:
                logger.error("max errors reached, stopping iterations!")
                return True
            run_results = resp["status"].get("results", {})
            stop = generator.eval_stop_condition(run_results)
            if stop:
                logger.info(
                    f"reached early stop condition ({generator.options.stop_condition}), stopping iterations!"
                )
            return stop

        completed_iter = as_completed([])
        for task in tasks:
            resp = client.submit(remote_handler_wrapper, task.to_json(),
                                 handler, self.spec.workdir)
            completed_iter.add(resp)
            queued_runs += 1
            if queued_runs >= parallel_runs:
                future = next(completed_iter)
                early_stop = process_result(future)
                queued_runs -= 1
                if early_stop:
                    break

        for future in completed_iter:
            process_result(future)

        client.close()
        if function_name and generator.options.teardown_dask:
            logger.info("tearing down the dask cluster..")
            mlrun.get_run_db().delete_runtime_object("dask",
                                                     function_name,
                                                     force=True)

        return results
Пример #2
0
def execute_list_of_clustering_tasks_chunked(clustering_tasks,
                                             tests_per_batch=None):
    if tests_per_batch is None:
        tests_per_batch = len(clustering_tasks)

    with Client(address=SCHEDULER_HOSTNAME) as client:
        nb_of_chunks = math.ceil(len(clustering_tasks) / tests_per_batch)
        for chunk in tqdm(chunks(clustering_tasks, tests_per_batch),
                          desc="chunks",
                          total=nb_of_chunks):
            dataset_dict = scatter_datasets(client)
            # tasks_ready_to_execute = fill_dataset_in_clustering_tasks(chunk, dataset_dict)
            futures = []
            for task in chunk:
                futures.append(
                    client.submit(ClusteringTask.run,
                                  task,
                                  dataset_dict[task.dataset_name],
                                  pure=False))
            # futures = client.map(lambda x: ClusteringTask.run(x), tasks_ready_to_execute, pure = False)
            for _ in tqdm(distributed.as_completed(futures,
                                                   with_results=True,
                                                   raise_errors=False),
                          total=len(futures),
                          desc="tasks in chunk",
                          leave=False):
                pass
Пример #3
0
def get_minmax(varname, varpath, client):

    futures = list()
    numfiles = 0
    for root, _, files in os.walk(varpath):
        if not files:
            continue
        numfiles = len(files)
        pbar = tqdm(files, desc="{}".format(varname))
        for i, f in enumerate(sorted(files)):
            inpath = os.path.join(root, f)
            futures.append(client.submit(run_minmax, inpath, varname, i))

    mins = []
    mins2d = [[] for x in range(numfiles)]
    maxs = []
    maxs2d = [[] for x in range(numfiles)]
    for future, minmax in as_completed(futures, with_results=True):
        pbar.update(1)
        mins2d[minmax[2]] = minmax[0]
        maxs2d[minmax[2]] = minmax[1]

    pbar.close()
    for l in mins2d:
        for i in l:
            mins.append(i)
    for l in maxs2d:
        for i in l:
            maxs.append(i)
    return mins, maxs
Пример #4
0
def main():

    args = parse_args()
    cluster = LocalCluster(
        n_workers=int(args.processes),
        threads_per_worker=1,
        interface='lo')
    client = Client(address=cluster.scheduler_address)

    contents = os.listdir(args.input)

    futures = []
    with open(args.hashfile, 'r') as hashfile:
        for line in hashfile.readline():
            name, expected_hash = line.split('|')
            _, name = os.path.split(name)
            if name not in contents:
                continue()
            futures.append(
                client.submit(
                    checkhash, 
                    os.path.join(args.input, name), 
                    expected_hash))
    
    for future in as_completed(futures):
        path, match = future.results()
        print(path, match)
Пример #5
0
    def sample_problems(self,
                        n_min_objects=1,
                        n_max_objects=12,
                        n_configs=100,
                        workspace=((0.25, -0.25), (0.75, 0.25)),
                        radius=0.0375,
                        timeout=2.0,
                        n_max_iter=50):
        """
        n_min_objects, n_max_objects: interval of number of objects to consider for the problems
        n_configs: number of configurations for each number of objects
        workspace: workspace in which to sample objects
        radius: radius for collision checks
        timeout: time limit that defines when a configuration should be discarded
        because more objects cannot be added
        n_max_iter: Number of times we allow timeout to be reached
        """
        workspace = np.asarray(workspace).tolist()
        self.infos = dict(workspace=workspace, radius=radius)

        def sample_state(n_objects):
            sampler = StateSampler(workspace=workspace, radius=radius)
            n_iter = 0
            success = False
            while not success:
                if n_iter > n_max_iter:
                    raise SamplerError('Too many objects for this workspace.')
                try:
                    state = sampler(n_objects, max_time=timeout)
                    success = True
                except SamplerError:
                    success = False
                    n_iter += 1
            return state

        def sample_src_tgt(n_objects, seed):
            np.random.seed(seed)
            src = sample_state(n_objects)
            tgt = sample_state(n_objects)
            return src, tgt, n_objects

        eval_df = defaultdict(list)
        futures = []
        for n in np.arange(n_min_objects, n_max_objects + 1):
            for _ in range(n_configs):
                fut = self.client.submit(sample_src_tgt, n,
                                         np.random.randint(2**32 - 1))
                futures.append(fut)

        print("Sampling configurations ...")
        for fut in tqdm(as_completed(futures), total=len(futures)):
            src, tgt, n_objects = fut.result()
            eval_df['src'].append(src)
            eval_df['tgt'].append(tgt)
            eval_df['n_objects'].append(n_objects)

        self.eval_df = pd.DataFrame(eval_df)
        self.eval_df = self.eval_df.sort_values('n_objects').reset_index(
            drop=True)
Пример #6
0
    def run(self):
        """ Execute the algorithm. """
        self.start_computing_time = time.time()

        population_to_evaluate = self.create_initial_solutions()
        task_pool = as_completed(self.evaluate(population_to_evaluate))

        self.init_progress()

        auxiliar_population = []
        for future in task_pool:
            # The initial population is not full
            if len(auxiliar_population) < self.population_size:
                received_solution = future.result()
                auxiliar_population.append(received_solution)

                new_task = self.client.submit(self.problem.evaluate,
                                              self.problem.create_solution())
                task_pool.add(new_task)
            # Perform an algorithm step to create a new solution to be evaluated
            else:
                offspring_population = []

                if not self.stopping_condition_is_met():
                    offspring_population.append(future.result())

                    # Replacement
                    join_population = auxiliar_population + offspring_population
                    auxiliar_population = RankingAndCrowdingDistanceSelection(
                        self.population_size).execute(join_population)

                    # Selection
                    mating_population = []

                    for _ in range(2):
                        solution = self.selection_operator.execute(
                            population_to_evaluate)
                        mating_population.append(solution)

                    # Reproduction and evaluation
                    new_task = self.client.submit(reproduction,
                                                  mating_population,
                                                  self.problem,
                                                  self.crossover_operator,
                                                  self.mutation_operator)

                    task_pool.add(new_task)
                else:
                    for future in task_pool.futures:
                        future.cancel()
                    break

                self.evaluations += 1
                self.solutions = auxiliar_population

                self.update_progress()

        self.total_computing_time = time.time() - self.start_computing_time
        self.solutions = auxiliar_population
Пример #7
0
 def end(self) -> None:
     if not self.client:
         raise AirflowException(NOT_STARTED_MESSAGE)
     if not self.futures:
         raise AirflowException(NOT_STARTED_MESSAGE)
     self.client.cancel(list(self.futures.keys()))
     for future in as_completed(self.futures.copy()):
         self._process_future(future)
Пример #8
0
def executor_map(func, iterable):
    """ orderless parallel map """
    # TODO: queues..
    import distributed # massively slow to import...
    executor = distributed.Executor()
    futures = executor.map(func, iterable) # should return immediately
    for future in distributed.as_completed(futures): # blocks
        assert future.done()
        yield future.result()     
Пример #9
0
    def mysum():
        result = 0
        sub_tasks = [delayed(double)(i) for i in range(100)]

        with worker_client() as lc:
            futures = lc.compute(sub_tasks)
            for f in as_completed(futures):
                result += f.result()
        return result
Пример #10
0
    def mysum():
        result = 0
        sub_tasks = [delayed(double)(i) for i in range(100)]

        with worker_client() as lc:
            futures = lc.compute(sub_tasks)
            for f in as_completed(futures):
                result += f.result()
        return result
Пример #11
0
    def eval_solver(self, solver, method_name='MyMethod', add_fields=[]):
        """
        solver: The solver to evaluate.
        method_name: Name of the method to evaluate, name that will appear in results dataframe.
        add_fields: List containing additionnal fields that should be saved. These fields
        must be present in the output dict of the solver.
        """
        all_outputs = defaultdict(list)
        save_fields = ['success', 'n_moves', 'n_collision_checks'
                       ] + list(add_fields)

        def run_solver(solver, problem, problem_idx):
            outputs = solver(problem)
            assert "solved" in outputs
            if outputs['solved']:
                assert 'actions' in outputs
                actions = outputs['actions']
                # Verify that the problem is indeed solved.
                # This will stop the evaluation if the action sequence found doesn't solve the problem
                # or doesn't respect the constraints (objects within workspace and no collisions)
                problem.assert_solution_valid(actions)
                outputs['n_moves'] = len(actions)
                outputs['success'] = True
            else:
                outputs['success'] = False
                outputs['n_moves'] = np.nan
                outputs['n_collision_checks'] = np.nan
            return outputs, problem_idx

        futures = []
        print(f"{method_name} evaluation ...")
        for row_idx, row in enumerate(self.eval_df.itertuples()):
            src = row.src
            tgt = row.tgt
            problem = RearrangementProblem(src=src,
                                           tgt=tgt,
                                           workspace=self.infos['workspace'],
                                           radius=self.infos['radius'])
            fut = self.client.submit(run_solver, solver, problem, row_idx)
            futures.append(fut)

        for fut in tqdm(as_completed(futures), total=len(futures)):
            outputs, problem_idx = fut.result()
            all_outputs['problem_idx'].append(problem_idx)
            for k in save_fields:
                all_outputs[f'{method_name}/{k}'].append(outputs[k])
        print("Done")
        sort_ids = np.argsort(all_outputs['problem_idx'])
        del all_outputs['problem_idx']
        all_outputs = pd.DataFrame(all_outputs)
        all_outputs = all_outputs.iloc[sort_ids].reset_index(drop=True)
        self.eval_df = pd.concat((self.eval_df, all_outputs), axis=1)
        return
Пример #12
0
def test_as_completed(client):
    ac = client.submit(Counter, actor=True).result()
    futures = [ac.increment() for _ in range(10)]
    max = 0

    for future in as_completed(futures):
        value = future.result()
        if value > max:
            max = value

    assert all(future.done() for future in futures)
    assert max == 10
Пример #13
0
    def solve(self):
        # start off some particles
        futures = []
        for particle in self.particles:  # [:max(int((self.num_workers + 1) / self.replications), 1)]:
            futures.append(self.create_parallel_particle_future(particle.name))
            # time.sleep(60)

        completed = as_completed(futures, with_results=True)

        for batch in completed.batches():
            for future, (particle_num, score, position, velocity) in batch:
                self.particles[particle_num].update_score_position_velocity(
                    score, position, velocity)

                # particle not running anymore
                self.particles_running[particle_num] = False
                # update the epoch
                self.particle_epochs_completed[particle_num] += 1
                # see if there's a new best score
                self.update_global(score, position)

                # print("-- tassie devil\n{}\n{}\n{}\n".format([particle.name for particle in self.particles],
                #                                              self.particle_epochs_completed,
                #                                              self.particles_running))

                # find the next particle (min epochs done and not currently running)
                min_epochs = sys.maxsize
                next_particle_pos = -1
                for pos, (is_running, epochs_done) in enumerate(
                        zip(self.particles_running,
                            self.particle_epochs_completed)):
                    if not is_running and epochs_done < min_epochs:
                        min_epochs = epochs_done
                        next_particle_pos = pos
                # print("min: {}".format(min_epochs))

                if min_epochs < self.iterations:
                    # not done yet - find the min particle
                    particle = self.particles[next_particle_pos]

                    # print("index={}, particle_num={}".format(particle_num, particle.name))

                    # update for the next run
                    particle.update_velocity(self.global_best_position)
                    particle.update_position()

                    # score the particle position
                    completed.add(
                        self.create_parallel_particle_future(particle_num))
        # do something with the results now
        print("=========== Done ({}) ===============".format(self.iterations))
        print(self)
Пример #14
0
def test_as_completed_async_for(c, s, a, b):
    futures = c.map(inc, range(10))
    ac = as_completed(futures)
    results = []

    async def f():
        async for future in ac:
            result = await future
            results.append(result)

    yield f()

    assert set(results) == set(range(1, 11))
Пример #15
0
def test_as_completed_async_for_results(c, s, a, b):
    futures = c.map(inc, range(10))
    ac = as_completed(futures, with_results=True)
    results = []

    async def f():
        async for future, result in ac:
            results.append(result)

    yield f()

    assert set(results) == set(range(1, 11))
    assert not s.counters['op'].components[0]['gather']
Пример #16
0
 def iterate():
     locs = map(coords.get_loc, coords.drop_duplicates())
     if client is not None:
         futures = client.map(
             self._compute_anomaly,
             [data.isel(**{agedim: sl}) for sl in locs],
             **kwargs)
         for future, result in distributed.as_completed(
                 futures, with_results=True):
             futures.remove(future)
             yield result
     else:
         for sl in locs:
             yield self._compute_anomaly(data.isel(**{agedim: sl}),
                                         **kwargs)
Пример #17
0
def eval_population(population, client, context=context):
    """ Concurrently evaluate all the individuals in the given population

    :param population: to be evaluated
    :param client: dask client
    :param context: for storing count of non-viable individuals
    :return: dask distrib iterator for futures
    """
    # farm out population to worker nodes for evaluation
    worker_futures = client.map(evaluate(context=context),
                                population,
                                pure=False)

    # We'll need this later to catch eval tasks as they complete, and to
    # submit new tasks.
    return distributed.as_completed(worker_futures)
Пример #18
0
def record_dataset_dask(client, ds_dir,
                        scene_cls, scene_kwargs,
                        n_chunks, n_frames_per_chunk,
                        start_seed=0, resume=False):

    seeds = set(range(start_seed, start_seed + n_chunks))
    if resume:
        done_seeds = (ds_dir / 'seeds_recorded.txt').read_text().strip().split('\n')
        seeds = set(seeds) - set(map(int, done_seeds))
        all_keys = (ds_dir / 'keys_recorded.txt').read_text().strip().split('\n')
    else:
        all_keys = []
    seeds = tuple(seeds)

    future_kwargs = []
    for seed in seeds:
        kwargs = dict(ds_dir=ds_dir, seed=seed,
                      n_frames=n_frames_per_chunk,
                      scene_cls=scene_cls,
                      scene_kwargs=scene_kwargs)
        future_kwargs.append(kwargs)

    futures = []
    for kwargs in future_kwargs:
        futures.append(client.submit(record_chunk, **kwargs))

    iterator = as_completed(futures)
    unit = 'frame'
    unit_scale = n_frames_per_chunk
    n_futures = len(future_kwargs)
    tqdm_iterator = tqdm(iterator, total=n_futures, unit_scale=unit_scale, unit=unit, ncols=80)

    seeds_file = open(ds_dir / 'seeds_recorded.txt', 'a')
    keys_file = open(ds_dir / 'keys_recorded.txt', 'a')

    for future in tqdm_iterator:
        keys, seed = future.result()
        all_keys += keys
        seeds_file.write(f'{seed}\n')
        seeds_file.flush()
        keys_file.write('\n'.join(keys) + '\n')
        keys_file.flush()
        client.cancel(future)

    seeds_file.close()
    keys_file.close()
    return all_keys
Пример #19
0
    def pytest_runtestloop(self, session):
        if (session.testsfailed
                and not session.config.option.continue_on_collection_errors):
            raise session.Interrupted("%d errors during collection" %
                                      session.testsfailed)

        unregister_plugins = ['debugging', 'terminalreporter']
        for p in unregister_plugins:
            session.config.pluginmanager.unregister(p)

        if session.config.option.collectonly:
            return True

        def generate_tasks(session):
            for i, item in enumerate(session.items):

                # @delayed(pure=False)
                def run_test(_item):
                    # ensure that the plugin manager gets recreated appropriately.
                    _item.config.pluginmanager.__recreate__()
                    results = self.pytest_runtest_protocol(item=_item,
                                                           nextitem=None)
                    return results

                # hook = item.ihook
                # try to ensure that the module gets treated as a dynamic module that does not
                # exist.

                # delattr(item.module, '__file__')
                # setup = hook.pytest_runtest_setup
                # make_report = hook.pytest_runtest_makereport

                fut = self.client.submit(run_test, item, pure=False)
                yield fut

        with self.remote_syspath_ctx():
            tasks = generate_tasks(session)

            # log these reports to the console.
            for resolved in as_completed(tasks):
                t = resolved.result()
                for report in t:
                    session.ihook.pytest_runtest_logreport(report=report)

        return True
def run_actions(unit_context,
                loop_variable_name=None,
                loop_variable_value=None,
                in_loop=None,
                on_dask=None):
    results = []
    triggers = []
    if in_loop:
        if on_dask:
            futures = []
            client = DaskClient().get_dask_client()
            for value in unit_context.stageContext.pipelineContext.variables[
                    loop_variable_name]:
                futures.append(
                    client.submit(run_actions,
                                  unit_context,
                                  loop_variable_name,
                                  value,
                                  False,
                                  False,
                                  pure=False))
            for future in as_completed(futures):
                result, trigger_pipeline_data_list = future.result()
                results.extend(result)
                triggers.extend(trigger_pipeline_data_list)
        else:
            for value in unit_context.stageContext.pipelineContext.variables[
                    loop_variable_name]:
                result, trigger_pipeline_data_list = run_actions(
                    unit_context, loop_variable_name, value, False, False)
                results.extend(result)
                triggers.extend(trigger_pipeline_data_list)
    else:
        for action in unit_context.unit.do:
            action_context = ActionContext(unit_context, action)
            if loop_variable_name and loop_variable_value:
                action_context.delegateVariableName = loop_variable_name
                action_context.delegateValue = loop_variable_value
            result, trigger_pipeline_data_list = run_action(action_context)
            results.append(result)
            triggers.extend(trigger_pipeline_data_list)
    return results, triggers
Пример #21
0
def test_as_completed_async_for_cancel(c, s, a, b):
    x = c.submit(inc, 1)
    y = c.submit(sleep, 0.3)
    ac = as_completed([x, y])

    async def _():
        await gen.sleep(0.1)
        await y.cancel(asynchronous=True)

    c.loop.add_callback(_)

    L = []

    async def f():
        async for future in ac:
            L.append(future)

    yield f()

    assert L == [x, y]
Пример #22
0
def execute_list_of_futures_with_dataset_requirements(tuple_list,
                                                      tests_per_batch=None):
    """

    :param tuple_list: list tuples (dataset_name, f(dataset_future) -> future
    :param tests_per_batch: how much tests per batch you want
    :return:
    """
    if tests_per_batch is None:
        tests_per_batch = len(tuple_list)

    with Client(address=SCHEDULER_HOSTNAME) as client:
        for chunk in chunks(tuple_list, tests_per_batch):
            dataset_dict = scatter_datasets(client)
            futures = fill_dataset_requirements(chunk, client, dataset_dict)
            for _ in tqdm(distributed.as_completed(futures,
                                                   with_results=True,
                                                   raise_errors=True),
                          total=len(futures)):
                pass
            client.restart()
Пример #23
0
def execute_list_of_clustering_tasks(clustering_tasks, tests_per_batch=None):
    if len(clustering_tasks) == 0:
        print("all clustering tasks done")
    if tests_per_batch is None:
        tests_per_batch = len(clustering_tasks)

    with Client(address=SCHEDULER_HOSTNAME) as client:
        with tqdm(total=len(clustering_tasks)) as pbar:

            tasks_to_still_do = deque(clustering_tasks)
            dataset_dict = scatter_datasets(client)
            futures = []
            for _ in range(min(tests_per_batch, len(clustering_tasks))):
                task = tasks_to_still_do.popleft()
                futures.append(
                    client.submit(ClusteringTask.run,
                                  task,
                                  dataset_dict[task.dataset_name],
                                  pure=False))

            as_completed_futures = distributed.as_completed(futures,
                                                            with_results=True,
                                                            raise_errors=False)
            for future, _ in as_completed_futures:
                pbar.update(1)
                f: Future = future
                if f.exception() is not None:
                    t = f.traceback()
                    # for line in t.format():
                    #     print(line, end = "")
                    # traceback.print_exception()f.exception()
                    traceback.print_tb(f.traceback())
                    print(f.exception())
                if len(tasks_to_still_do) > 0:
                    task = tasks_to_still_do.popleft()
                    future = client.submit(ClusteringTask.run,
                                           task,
                                           dataset_dict[task.dataset_name],
                                           pure=False)
                    as_completed_futures.add(future)
    def _consume_jobs(self, chunk_size=None):
        """Consumes jobs

        If chunk_size is set function consumes specified number of
        Finished tasks or less if sent_jobs_ids queue became empty.
        If chunk_size is None function consumes jobs until
        sent_jobs_ids queue became empty.
        Jobs with statuses Cancelled, Abandoned, Terminated will be
        resent and their ids added to sent_jobs_ids queue

        :param chunk_size: size of consuming chunk
        :return: generator on job results
        """
        logger.debug("Consuming jobs started")

        jobs_to_consume = []
        while not self.sent_jobs.empty():
            job = self.sent_jobs.get()
            jobs_to_consume.append(job)

            if chunk_size is not None:
                chunk_size -= 1
                if chunk_size <= 0:
                    break

        for ready_job in distributed.as_completed(jobs_to_consume):
            results = ready_job.result()
            self.sent_jobs_count -= 1

            for result in results:
                (node_id, serialized), exc = result
                logger.debug(
                    "Got ready task for node %s, serialized: %s, "
                    "error: %s", node_id, serialized, exc)
                if exc is not None:
                    raise exc
                yield node_id, serialized

        logger.debug("Consuming jobs finished")
Пример #25
0
def parallelStatsDaskSimple(urlSplits,
                            ds,
                            nEpochs,
                            variable,
                            mask,
                            coordinates,
                            reader,
                            outHdfsPath,
                            averagingConfig,
                            sparkConfig,
                            accumulators=['count', 'mean', 'M2', 'min',
                                          'max']):
    '''Compute N-day climatology statistics in parallel using PySpark or pysparkling.'''
    if not sparkConfig.startswith('dask,'):
        print("dask: configuration must be of form 'dask,n'", file=sys.stderr)
        sys.exit(1)
    numPartitions = int(sparkConfig.split(',')[1])

    with Timer("Configure Dask distributed"):
        from distributed import Client, as_completed
        client = Client(DaskClientEndpoint)

    print('Starting parallel Stats using Dask . . .', file=sys.stderr)
    start = time.time()
    futures = client.map(
        lambda urls: parallelStatsPipeline(
            urls, ds, nEpochs, variable, mask, coordinates, reader,
            averagingConfig, outHdfsPath, accumulators), urlSplits)

    outputFiles = []
    for future in as_completed(futures):
        outputFile = future.result()
        outputFiles.append(outputFile)
        end = time.time()
        print("parallelStats: Completed %s in %0.3f seconds." %
              (outputFile, (end - start)),
              file=sys.stderr)
    return outputFiles
Пример #26
0
    def _consume_jobs(self, chunk_size=None):
        """Consumes jobs

        If chunk_size is set function consumes specified number of
        Finished tasks or less if sent_jobs_ids queue became empty.
        If chunk_size is None function consumes jobs until
        sent_jobs_ids queue became empty.
        Jobs with statuses Cancelled, Abandoned, Terminated will be
        resent and their ids added to sent_jobs_ids queue

        :param chunk_size: size of consuming chunk
        :return: generator on job results
        """
        logger.debug("Consuming jobs started")

        jobs_to_consume = []
        while not self.sent_jobs.empty():
            job = self.sent_jobs.get()
            jobs_to_consume.append(job)

            if chunk_size is not None:
                chunk_size -= 1
                if chunk_size <= 0:
                    break

        for ready_job in distributed.as_completed(jobs_to_consume):
            results = ready_job.result()
            self.sent_jobs_count -= 1

            for result in results:
                (node_id, serialized), exc = result
                logger.debug("Got ready task for node %s, serialized: %s, "
                             "error: %s", node_id, serialized, exc)
                if exc is not None:
                    raise exc
                yield node_id, serialized

        logger.debug("Consuming jobs finished")
Пример #27
0
def run(ctx, path, ncores):
    path = pathlib.Path(path)
    if path.is_dir():
        paths = list(path.rglob("*.py"))
    else:
        paths = [path]

    import distributed

    with distributed.Client(n_workers=ncores) as client:
        print(client)
        futures = {}
        for p in tqdm.tqdm(sorted(paths), desc="Creating tasks"):
            fut = client.submit(run_migrators_on_file, p)
            futures[fut] = p

        progress_iterator = tqdm.tqdm(desc="Scanning :",
                                      total=len(futures),
                                      miniters=1)
        all_changed = set()
        for resolved_fut in distributed.as_completed(futures):
            path = futures[resolved_fut]
            progress_iterator.update(1)
            progress_iterator.set_description(f"Scanning: {path}")
            try:
                changed = resolved_fut.result()
                if changed:
                    all_changed.add(path)
            except CantParseException as e:
                print(f"Can't parse {path}", file=sys.stderr)
        progress_iterator.close()

        if len(all_changed):
            print("Changed the following files:")
            for path in sorted(all_changed):
                print(f"    {path}")
Пример #28
0
    def _iterate_jobs(self):
        """
        Iterate through all jobs until the domain size is depleted or
        time runs out.
        :return: completed job
        """
        backlog_half = self.backlog_per_worker / 2
        active_futures = self._init_futures(self.backlog_per_worker)
        next_futures = []

        try:
            while ((self._has_more_work()
                    or self.index_completed < self.index_scheduled)
                   and not self.canceled):
                iterated = 0
                for future in as_completed(active_futures):
                    job = future.result()
                    self._mark_job_completed(job)
                    iterated += 1

                    self.tracer.trace_job(job)

                    if iterated >= (backlog_half * self.worker_count):
                        iterated = 0
                        if self._has_more_work():
                            next_futures += self._schedule(backlog_half)

                if self._has_more_work():
                    next_futures += self._schedule(backlog_half)

                active_futures = next_futures
                next_futures = []
        except Exception as e:
            haydi_logger.error(traceback.format_exc(e))

        self.completed = True
Пример #29
0
    def _iterate_jobs(self):
        """
        Iterate through all jobs until the domain size is depleted or
        time runs out.
        :return: completed job
        """
        backlog_half = self.backlog_per_worker / 2
        active_futures = self._init_futures(self.backlog_per_worker)
        next_futures = []

        try:
            while ((self._has_more_work() or
                   self.index_completed < self.index_scheduled) and
                   not self.canceled):
                iterated = 0
                for future in as_completed(active_futures):
                    job = future.result()
                    self._mark_job_completed(job)
                    iterated += 1

                    self.tracer.trace_job(job)

                    if iterated >= (backlog_half * self.worker_count):
                        iterated = 0
                        if self._has_more_work():
                            next_futures += self._schedule(backlog_half)

                if self._has_more_work():
                    next_futures += self._schedule(backlog_half)

                active_futures = next_futures
                next_futures = []
        except Exception as e:
            haydi_logger.error(traceback.format_exc(e))

        self.completed = True
Пример #30
0
        printt('[info] master > maxmin finished')
    else:
        for i in range(num_worker):
            chunk_data = client.scatter([anchors, chunks[i]])
            workers.append(client.submit(work, chunk_data, f'worker_{i}', cur_iter, *option))

    # 이터레이션이 실패할 경우를 대비해 redis 의 값을 백업
    #entities_initialized_bak = iter_mget(r, [f'{entity}_v' for entity in entities])
    #entities_initialized_bak = np.array([loads(decompress(v)) for v in entities_initialized_bak])
    #relations_initialized_bak = iter_mget(r, [f'{relation}_v' for relation in relations])
    #relations_initialized_bak = np.array([loads(decompress(v)) for v in relations_initialized_bak])

    # client.gather(workers)
    # result_iter = [worker.result() for worker in workers]
    result_iter = []
    ac = as_completed(workers, with_results=True)
    for future, result in ac:
        result_iter.append(result)
    iterTimes.append(timeit.default_timer() - iterStart)

    if all([e[0] for e in result_iter]):

        # 이터레이션 성공
        printt('[info] master > iter %d - time : %f' % (cur_iter, timeit.default_timer() - timeNow))
        success = True
        trial = 0
        cur_iter += 1
        workTimes = [e[1] for e in result_iter]

        printt('[info] master > Total embedding times : ' + str(workTimes))
        # printt('[info] master > Average total embedding time : ' + str(np.mean(workTimes)))
Пример #31
0
    def run(self, domain,
            worker_reduce_fn, worker_reduce_init,
            global_reduce_fn, global_reduce_init):
        size = domain.steps
        assert size is not None  # TODO: Iterators without size

        workers = 0
        for name, value in self.executor.ncores().items():
            workers += value

        if workers == 0:
            raise Exception("There are no workers")

        batch_count = workers * 4
        batch_size = max(int(round(size / float(batch_count))), 1)
        batches = self._create_batches(batch_size, size, domain,
                                       worker_reduce_fn, worker_reduce_init)

        logging.info("Qit: starting {} batches with size {}".format(
            batch_count, batch_size))

        if self.job_observer:
            self.job_observer.on_computation_start(batch_count, batch_size)

        futures = self.executor.map(process_batch, batches)

        if self.track_progress:
            distributed.diagnostics.progress(futures)

        if self.write_partial_results is not None:
            result_saver = ResultSaver(self.execution_count,
                                       self.write_partial_results)
        else:
            result_saver = None

        timeouted = False
        results = []

        for future in as_completed(futures):
            job = future.result()
            if result_saver:
                result_saver.handle_result(job.result)
            if self.job_observer:
                self.job_observer.on_job_completed(job)

            results.append(job.result)

            if self.timeout and self.timeout.is_finished():
                logging.info("Qit: timeouted after {} seconds".format(
                    self.timeout.timeout))
                timeouted = True
                break

        # order results
        if not timeouted:
            results = [j.result for j in self.executor.gather(futures)]

        self.execution_count += 1

        if worker_reduce_fn is None:
            results = list(itertools.chain.from_iterable(results))

        logging.info("Qit: finished run with size {} (taking {})".format(
            len(results), domain.size))

        results = results[:domain.size]  # trim results to required size

        if global_reduce_fn is None:
            return results
        else:
            if global_reduce_init is None:
                return reduce(global_reduce_fn, results)
            else:
                return reduce(global_reduce_fn, results, global_reduce_init)
Пример #32
0
def parallel_calculate_chunks(chunks,
                              features,
                              approximate,
                              training_window,
                              verbose,
                              save_progress,
                              entityset,
                              n_jobs,
                              no_unapproximated_aggs,
                              cutoff_df_time_var,
                              target_time,
                              pass_columns,
                              dask_kwargs=None):
    from distributed import as_completed
    from dask.base import tokenize

    client = None
    cluster = None
    try:
        client, cluster = create_client_and_cluster(n_jobs=n_jobs,
                                                    num_tasks=len(chunks),
                                                    dask_kwargs=dask_kwargs)
        # scatter the entityset
        # denote future with leading underscore
        start = time.time()
        es_token = "EntitySet-{}".format(tokenize(entityset))
        if es_token in client.list_datasets():
            print("Using EntitySet persisted on the cluster as dataset %s" %
                  (es_token))
            _es = client.get_dataset(es_token)
        else:
            _es = client.scatter([entityset])[0]
            client.publish_dataset(**{_es.key: _es})

        # save features to a tempfile and scatter it
        pickled_feats = cloudpickle.dumps(features)
        _saved_features = client.scatter(pickled_feats)
        client.replicate([_es, _saved_features])
        end = time.time()
        scatter_time = end - start
        scatter_string = "EntitySet scattered to workers in {:.3f} seconds"
        print(scatter_string.format(scatter_time))

        # map chunks
        # TODO: consider handling task submission dask kwargs
        _chunks = client.map(calculate_chunk,
                             chunks,
                             features=_saved_features,
                             entityset=_es,
                             approximate=approximate,
                             training_window=training_window,
                             profile=False,
                             verbose=False,
                             save_progress=save_progress,
                             no_unapproximated_aggs=no_unapproximated_aggs,
                             cutoff_df_time_var=cutoff_df_time_var,
                             target_time=target_time,
                             pass_columns=pass_columns)

        feature_matrix = []
        iterator = as_completed(_chunks).batches()
        if verbose:
            pbar_str = ("Elapsed: {elapsed} | Remaining: {remaining} | "
                        "Progress: {l_bar}{bar}| "
                        "Calculated: {n}/{total} chunks")
            pbar = make_tqdm_iterator(total=len(_chunks), bar_format=pbar_str)
        for batch in iterator:
            results = client.gather(batch)
            for result in results:
                feature_matrix.append(result)
                if verbose:
                    pbar.update()
        if verbose:
            pbar.close()
    except Exception:
        raise
    finally:
        if 'cluster' not in dask_kwargs and cluster is not None:
            cluster.close()
        if client is not None:
            client.close()

    return feature_matrix
def parallel_calculate_chunks(chunks, features, approximate, training_window,
                              verbose, save_progress, entityset, n_jobs,
                              no_unapproximated_aggs, cutoff_df_time_var,
                              target_time, pass_columns, dask_kwargs=None):
    from distributed import as_completed
    from dask.base import tokenize

    client = None
    cluster = None
    try:
        client, cluster = create_client_and_cluster(n_jobs=n_jobs,
                                                    num_tasks=len(chunks),
                                                    dask_kwargs=dask_kwargs,
                                                    entityset_size=entityset.__sizeof__())
        # scatter the entityset
        # denote future with leading underscore
        if verbose:
            start = time.time()
        es_token = "EntitySet-{}".format(tokenize(entityset))
        if es_token in client.list_datasets():
            if verbose:
                msg = "Using EntitySet persisted on the cluster as dataset {}"
                print(msg.format(es_token))
            _es = client.get_dataset(es_token)
        else:
            _es = client.scatter([entityset])[0]
            client.publish_dataset(**{_es.key: _es})

        # save features to a tempfile and scatter it
        pickled_feats = cloudpickle.dumps(features)
        _saved_features = client.scatter(pickled_feats)
        client.replicate([_es, _saved_features])
        if verbose:
            end = time.time()
            scatter_time = end - start
            scatter_string = "EntitySet scattered to workers in {:.3f} seconds"
            print(scatter_string.format(scatter_time))

        # map chunks
        # TODO: consider handling task submission dask kwargs
        _chunks = client.map(calculate_chunk,
                             chunks,
                             features=_saved_features,
                             entityset=_es,
                             approximate=approximate,
                             training_window=training_window,
                             profile=False,
                             verbose=False,
                             save_progress=save_progress,
                             no_unapproximated_aggs=no_unapproximated_aggs,
                             cutoff_df_time_var=cutoff_df_time_var,
                             target_time=target_time,
                             pass_columns=pass_columns)

        feature_matrix = []
        iterator = as_completed(_chunks).batches()
        if verbose:
            pbar_str = ("Elapsed: {elapsed} | Remaining: {remaining} | "
                        "Progress: {l_bar}{bar}| "
                        "Calculated: {n}/{total} chunks")
            pbar = make_tqdm_iterator(total=len(_chunks), bar_format=pbar_str)
        for batch in iterator:
            results = client.gather(batch)
            for result in results:
                feature_matrix.append(result)
                if verbose:
                    pbar.update()
        if verbose:
            pbar.close()
    except Exception:
        raise
    finally:
        if 'cluster' not in dask_kwargs and cluster is not None:
            cluster.close()
        if client is not None:
            client.close()

    return feature_matrix
Пример #34
0
 def end(self):
     for future in distributed.as_completed(self.futures.copy()):
         self._process_future(future)
Пример #35
0
    def execute(self):
        options = self.config["mitodistances"]
        output_dir = self.config["output-directory"]
        body_svc, mito_svc = self.init_services()

        # Resource manager context must be initialized before resource manager client
        # (to overwrite config values as needed)
        dvid_mgr_config = self.config["dvid-access-manager"]
        dvid_mgr_context = LocalResourceManager(dvid_mgr_config)
        dvid_mgr_client = ResourceManagerClient(dvid_mgr_config["server"],
                                                dvid_mgr_config["port"])

        syn_server, syn_uuid, syn_instance = (options['synapse-criteria'][k]
                                              for k in ('server', 'uuid',
                                                        'instance'))
        syn_conf = float(options['synapse-criteria']['confidence'])
        syn_types = ['PreSyn', 'PostSyn']
        if options['synapse-criteria']['type'] == 'pre':
            syn_types = ['PreSyn']
        elif options['synapse-criteria']['type'] == 'post':
            syn_types = ['PostSyn']

        bodies = load_body_list(options["bodies"], False)
        skip_flags = [
            os.path.exists(f'{output_dir}/{body}.csv') for body in bodies
        ]
        bodies_df = pd.DataFrame({'body': bodies, 'should_skip': skip_flags})
        bodies = bodies_df.query('not should_skip')['body']

        # Shuffle for better load balance?
        # TODO: Would be better to sort by synapse count, and put large bodies first,
        #       assigned to partitions in round-robin style.
        #       Then work stealing will be more effective at knocking out the smaller jobs at the end.
        #       This requires knowing all the body sizes, though.
        #       Perhaps mito count would be a decent proxy for synapse count, and it's readily available.
        #bodies = bodies.sample(frac=1.0).values

        os.makedirs('body-logs')
        os.makedirs(output_dir, exist_ok=True)

        mito_server, mito_uuid, mito_instance = (options['mito-labelmap'][k]
                                                 for k in ('server', 'uuid',
                                                           'instance'))

        @auto_retry(3)
        def _fetch_synapses(body):
            with dvid_mgr_client.access_context(syn_server, True, 1, 1):
                syn_df = fetch_annotation_label(syn_server,
                                                syn_uuid,
                                                syn_instance,
                                                body,
                                                format='pandas')
                if len(syn_df) == 0:
                    return syn_df
                syn_types, syn_conf
                syn_df = syn_df.query(
                    'kind in @syn_types and conf >= @syn_conf').copy()
                return syn_df[[*'xyz', 'kind', 'conf'
                               ]].sort_values([*'xyz']).reset_index(drop=True)

        @auto_retry(3)
        def _fetch_mito_ids(body):
            with dvid_mgr_client.access_context(mito_server, True, 1, 1):
                try:
                    return fetch_supervoxels(mito_server, mito_uuid,
                                             mito_instance, body)
                except HTTPError:
                    return []

        def process_and_save(body):
            tbars = _fetch_synapses(body)
            valid_mitos = _fetch_mito_ids(body)

            # TODO:
            #   Does the stdout_redirected() mechanism work correctly in the context of multiprocessing?
            #   If not, I should probably just use a custom logging handler instead.
            with open(f"body-logs/{body}.log",
                      "w") as f, stdout_redirected(f), Timer() as timer:
                processed_tbars = []
                if len(tbars) == 0:
                    logging.getLogger(__name__).warning(
                        f"Body {body}: No synapses found")

                if len(valid_mitos) == 0:
                    logging.getLogger(__name__).warning(
                        f"Body {body}: Failed to fetch mito supervoxels")
                    processed_tbars = initialize_results(body, tbars)

                if len(valid_mitos) and len(tbars):
                    processed_tbars = measure_tbar_mito_distances(
                        body_svc,
                        mito_svc,
                        body,
                        tbars=tbars,
                        valid_mitos=valid_mitos)

            if len(processed_tbars) > 0:
                processed_tbars.to_csv(f'{output_dir}/{body}.csv',
                                       header=True,
                                       index=False)
                with open(f'{output_dir}/{body}.pkl', 'wb') as f:
                    pickle.dump(processed_tbars, f)

            if len(tbars) == 0:
                return (body, 0, 'no-synapses', timer.seconds)

            if len(valid_mitos) == 0:
                return (body, len(processed_tbars), 'no-mitos', timer.seconds)

            return (body, len(tbars), 'success', timer.seconds)

        logger.info(
            f"Processing {len(bodies)}, skipping {bodies_df['should_skip'].sum()}"
        )

        def process_batch(bodies):
            return [*map(process_and_save, bodies)]

        with dvid_mgr_context:
            batch_size = max(1, len(bodies) // 10_000)
            futures = self.client.map(process_batch,
                                      iter_batches(bodies, batch_size))

            # Support synchronous testing with a fake 'as_completed' object
            if hasattr(self.client, 'DEBUG'):
                ac = as_completed_synchronous(futures, with_results=True)
            else:
                ac = distributed.as_completed(futures, with_results=True)

            try:
                results = []
                for f, r in tqdm_proxy(ac, total=len(futures)):
                    results.extend(r)
            finally:
                results = pd.DataFrame(
                    results,
                    columns=['body', 'synapses', 'status', 'processing_time'])
                results.to_csv('results-summary.csv', header=True, index=False)
                num_errors = len(results.query('status == "error"'))
                if num_errors:
                    logger.warning(
                        f"Encountered {num_errors} errors. See results-summary.csv"
                    )
def parallel_calculate_chunks(chunks,
                              features,
                              approximate,
                              training_window,
                              verbose,
                              save_progress,
                              entityset,
                              n_jobs,
                              no_unapproximated_aggs,
                              cutoff_df_time_var,
                              target_time,
                              pass_columns,
                              dask_kwargs=None):
    from distributed import Client, LocalCluster, as_completed
    from dask.base import tokenize

    client = None
    cluster = None
    try:
        if 'cluster' in dask_kwargs:
            cluster = dask_kwargs['cluster']
        else:
            diagnostics_port = None
            if 'diagnostics_port' in dask_kwargs:
                diagnostics_port = dask_kwargs['diagnostics_port']
                del dask_kwargs['diagnostics_port']

            workers = n_jobs_to_workers(n_jobs)
            workers = min(workers, len(chunks))
            cluster = LocalCluster(n_workers=workers,
                                   threads_per_worker=1,
                                   diagnostics_port=diagnostics_port,
                                   **dask_kwargs)
            # if cluster has bokeh port, notify user if unxepected port number
            if diagnostics_port is not None:
                if hasattr(cluster, 'scheduler') and cluster.scheduler:
                    info = cluster.scheduler.identity()
                    if 'bokeh' in info['services']:
                        msg = "Dashboard started on port {}"
                        print(msg.format(info['services']['bokeh']))

        client = Client(cluster)
        # scatter the entityset
        # denote future with leading underscore
        start = time.time()
        es_token = "EntitySet-{}".format(tokenize(entityset))
        if es_token in client.list_datasets():
            print("Using EntitySet persisted on the cluster as dataset %s" %
                  (es_token))
            _es = client.get_dataset(es_token)
        else:
            _es = client.scatter([entityset])[0]
            client.publish_dataset(**{_es.key: _es})

        # save features to a tempfile and scatter it
        pickled_feats = cloudpickle.dumps(features)
        _saved_features = client.scatter(pickled_feats)
        client.replicate([_es, _saved_features])
        end = time.time()
        scatter_time = end - start
        scatter_string = "EntitySet scattered to workers in {:.3f} seconds"
        print(scatter_string.format(scatter_time))

        # map chunks
        # TODO: consider handling task submission dask kwargs
        _chunks = client.map(calculate_chunk,
                             chunks,
                             features=_saved_features,
                             entityset=_es,
                             approximate=approximate,
                             training_window=training_window,
                             profile=False,
                             verbose=False,
                             save_progress=save_progress,
                             no_unapproximated_aggs=no_unapproximated_aggs,
                             cutoff_df_time_var=cutoff_df_time_var,
                             target_time=target_time,
                             pass_columns=pass_columns)

        feature_matrix = []
        iterator = as_completed(_chunks).batches()
        if verbose:
            pbar_str = ("Elapsed: {elapsed} | Remaining: {remaining} | "
                        "Progress: {l_bar}{bar}| "
                        "Calculated: {n}/{total} chunks")
            pbar = make_tqdm_iterator(total=len(_chunks), bar_format=pbar_str)
        for batch in iterator:
            results = client.gather(batch)
            for result in results:
                feature_matrix.append(result)
                if verbose:
                    pbar.update()
        if verbose:
            pbar.close()
    except Exception:
        raise
    finally:
        if 'cluster' not in dask_kwargs and cluster is not None:
            cluster.close()
        if client is not None:
            client.close()

    return feature_matrix
Пример #37
0
 def end(self):
     for future in distributed.as_completed(self.futures.copy()):
         self._process_future(future)
Пример #38
0
 def as_completed(futures):
     return distributed.as_completed(futures)