示例#1
0
 def wait(self, futures):
     if self.in_worker:
         secede()
     results = self.client.gather(list(futures))
     if self.in_worker:
         rejoin()
     return [r.get() for r in results]
示例#2
0
    def generateCoeffs(coeffs):
        c = get_client()
        futures = [c.submit(coeff.generate) for coeff in coeffs]

        secede()
        c.gather(futures)
        rejoin()

        return coeffs
示例#3
0
文件: _dask.py 项目: Aathi410/Pro123
    def retrieval_context(self):
        """Override ParallelBackendBase.retrieval_context to avoid deadlocks.

        This removes thread from the worker's thread pool (using 'secede').
        Seceding avoids deadlock in nested parallelism settings.
        """
        # See 'joblib.Parallel.__call__' and 'joblib.Parallel.retrieve' for how
        # this is used.
        if hasattr(thread_state, 'execution_state'):
            # we are in a worker. Secede to avoid deadlock.
            secede()

        yield

        if hasattr(thread_state, 'execution_state'):
            rejoin()
示例#4
0
    def run_experiment(self):
        """
        Run the experiment. Including all scenarios and replications.
        """
        dask_client = get_client(timeout=600)

        # print("dask client: {}".format(dask_client))
        # try:
        secede()
        print("secede: {}".format(self.name))
        #     seceded = True
        # except ValueError:
        #     seceded = False

        futures = []
        total_runs = len(self.scenarios) * self.num_replications
        for scenario in self.scenarios:
            scenario_name, configuration = scenario

            # replications_done = self.find_replications_done(scenario_name, configuration)
            replications_done = 0

            # random number of steps for this scenario
            total_steps = random.randint(300, 700)

            for run_id in range(replications_done, self.num_replications):
                future = dask_client.submit(run_model, self.name, scenario_name, run_id, total_runs, total_steps,
                                            self.scratch_path)
                futures.append(future)

        loggers_info = dask_client.gather(futures)

        # if seceded:
        print("rejoin: {}".format(self.name))
        rejoin()

        # gather all the output dbs into a single db
        out_db_filepath = Logger.gather_databases(self.name, loggers_info)

        dest_file_path_name = os.path.join(OUTPUT_DIR, os.path.split(out_db_filepath)[1])

        if not os.path.exists(dest_file_path_name):
            shutil.move(out_db_filepath, dest_file_path_name)

        return dest_file_path_name
示例#5
0
    def generate(self):
        def generateConstCoeff(c, dofs, order):
            # Generate the coefficient
            c.generate()

            tmp = c.components

            # Constant part of the coefficient?
            if len(c.derivs) == 0:
                return tmp

            # Contract the indices from the phi expansions
            for d in c.derivs:
                dphis = dPhis(dofs, d)
                a = tuple(range(self.order, self.order + d + 1))
                b = tuple(range(d + 1))

                tmp = np.tensordot(tmp, dphis, axes=(a, b))

            # Ignore zeros
            return tmp

        c = get_client()
        futures = [
            c.submit(generateConstCoeff, coeff, self.parametrization.dofs,
                     self.order) for coeff in self.constCoeffs
        ]

        secede()
        c.gather(futures)
        rejoin()

        # Add them together
        if len(futures) == 0:
            return
        self.components = futures[0].result()
        for i in range(1, len(futures)):
            self.components = self.components + futures[i].result()
示例#6
0
def run_test_with_timeout(
    test_config: TestConfig,
    incoming_state: dict,
    hostnames: List[str],
    duration: int = 15,
) -> dict:
    """
    Calls run_test with a timeout and signals run_test to end gracefully if timeout has completed

    Args:
        test_config: Config of test to run
        incoming_state: Initial state to run actions/asserts in
        hostnames: List of runner hostnames
        duration: Optional timeout to run test within (I suppose this is to make it convenient to call in runners)

    Returns:
        New state after running actions and asserts
    """
    if duration is None or duration < 0:
        return run_test(test_config, incoming_state, hostnames)

    # NOTE: Use a dask cluster scheduler?
    client = get_client()

    # Used to prevent system deadlock since we are spawning 2 threads
    secede()

    # NOTE: may improve way of doing this
    timeout_signal_name = f"keep-going-{str(uuid.uuid4())}"
    keep_going = Variable(timeout_signal_name)
    keep_going.set(True)

    run_test_task: Future = client.submit(
        run_test,
        test_config=test_config,
        incoming_state=incoming_state,
        hostnames=hostnames,
        timeout_signal_name=timeout_signal_name,
    )

    LOGGER.debug("Test duration config: %d seconds", duration)

    def distributed_timeout():
        # If a timeout from a previous test did not complete, it will keep running (it cannot be canceled)
        # However, if it keeps running, it can end another test early
        # This means it needs to receive a signal to return
        end_time = datetime.now() + timedelta(seconds=duration)
        while datetime.now() <= end_time and keep_going.get():
            time.sleep(test_config.get("secondsBetweenCycles", 1))

    timeout_task: Future = client.submit(distributed_timeout)

    # Wait for either test or timeout to finish
    # Return test result if it finishes first
    # End test if timeout finishes first and return state
    start = datetime.now()
    wait([run_test_task, timeout_task], return_when="FIRST_COMPLETED")
    end = datetime.now()

    rejoin()
    LOGGER.debug("Test %s took %d seconds", test_config["name"], (end - start).seconds)

    if run_test_task.done():
        keep_going.set(False)
        return run_test_task.result()
    elif timeout_task.done():
        LOGGER.debug("test task: %s", run_test_task)
        LOGGER.debug("timeout task: %s", timeout_task)
        LOGGER.info("Test %s timed out", test_config["name"])
        # NOTE: add timed out to summary?
        keep_going.set(False)
        return run_test_task.result()
示例#7
0
def coclustering(Z,
                 nclusters_row,
                 nclusters_col,
                 errobj,
                 niters,
                 epsilon,
                 col_clusters_init=None,
                 row_clusters_init=None,
                 run_on_worker=False):
    """
    Run the co-clustering, Dask implementation

    :param Z: m x n data matrix
    :param nclusters_row: num row clusters
    :param nclusters_col: number of column clusters
    :param errobj: convergence threshold for the objective function
    :param niters: maximum number of iterations
    :param epsilon: numerical parameter, avoids zero arguments in log
    :param row_clusters_init: initial row cluster assignment
    :param col_clusters_init: initial column cluster assignment
    :param run_on_worker: whether the function is submitted to a Dask worker
    :return: has converged, number of iterations performed. final row and
    column clustering, error value
    """
    client = get_client()

    Z = da.array(Z) if not isinstance(Z, da.Array) else Z

    [m, n] = Z.shape
    row_chunks, col_chunks = Z.chunksize

    row_clusters = da.array(row_clusters_init) \
        if row_clusters_init is not None \
        else _initialize_clusters(m, nclusters_row, chunks=row_chunks)
    col_clusters = da.array(col_clusters_init) \
        if col_clusters_init is not None \
        else _initialize_clusters(n, nclusters_col, chunks=col_chunks)
    R = _setup_cluster_matrix(nclusters_row, row_clusters)
    C = _setup_cluster_matrix(nclusters_col, col_clusters)

    e, old_e = 2 * errobj, 0
    s = 0
    converged = False

    Gavg = Z.mean()

    while (not converged) & (s < niters):
        logger.debug(f'Iteration # {s} ..')
        # Calculate cluster based averages
        # nel_clusters is a matrix with the number of elements per co-cluster
        # originally computed as:  da.dot(da.dot(R.T, da.ones((m, n))), C)
        nel_row_clusters = da.bincount(row_clusters, minlength=nclusters_row)
        nel_col_clusters = da.bincount(col_clusters, minlength=nclusters_col)
        logger.debug('num of populated clusters: row {}, col {}'.format(
            da.sum(nel_row_clusters > 0).compute(),
            da.sum(nel_col_clusters > 0).compute()))
        nel_clusters = da.outer(nel_row_clusters, nel_col_clusters)
        CoCavg = (da.matmul(da.matmul(R.T, Z), C) + Gavg * epsilon) / \
                 (nel_clusters + epsilon)

        # Calculate distance based on row approximation
        d_row = _distance(Z, da.matmul(C, CoCavg.T), epsilon)
        # Assign to best row cluster
        row_clusters = da.argmin(d_row, axis=1)
        R = _setup_cluster_matrix(nclusters_row, row_clusters)

        # Calculate distance based on column approximation
        d_col = _distance(Z.T, da.matmul(R, CoCavg), epsilon)
        # Assign to best column cluster
        col_clusters = da.argmin(d_col, axis=1)
        C = _setup_cluster_matrix(nclusters_col, col_clusters)

        # Error value (actually just the column components really)
        old_e = e
        minvals = da.min(d_col, axis=1)
        # power 1 divergence, power 2 euclidean
        e = da.sum(da.power(minvals, 1))
        row_clusters, R, col_clusters, C, e = client.persist(
            [row_clusters, R, col_clusters, C, e])
        if run_on_worker:
            # this is workaround for e.compute() for a function that runs
            # on a worker with multiple threads
            # https://github.com/dask/distributed/issues/3827
            e = client.compute(e)
            secede()
            e = e.result()
            rejoin()
        else:
            e = e.compute()
        logger.debug(f'Error = {e:+.15e}, dE = {e - old_e:+.15e}')
        converged = abs(e - old_e) < errobj
        s = s + 1
    if converged:
        logger.debug(f'Coclustering converged in {s} iterations')
    else:
        logger.debug(f'Coclustering not converged in {s} iterations')
    return converged, s, row_clusters, col_clusters, e
示例#8
0
文件: bootstrap.py 项目: chrhck/pyABC
def calc_cv_per_model(nr_particles, model_weights, N_BOOTSTR, test_w,
                      transitions, test_X):
    """
    Calculate the Coefficient of Variation.

    Parameters
    ----------

    nr_particles: int
        Number of particles to estimate the CV for

    model_weights: np.ndarray
        array of model weights

    N_BOOTSTR: int
        Nr of bootstrapped KDEs to take to estimate the CV

    test_w: List[np.ndarray]
        test_w[m] are the weights of the test points test_X[m] of model m

    transitions: List[Transition]
        List of transitions

    test_X: List[np.ndarray]
        test_X[m] are the test points with weights test_w[m]

    client: Client to execute on

    Returns
    -------

    cv, variations_at_X: float, List[np.ndarray]
        * cv is the mean variation
        * variations_at_X are the variations at the test_X

    """
    test_transitions = copy.deepcopy(transitions)

    # how many particles to draw for each model
    n_per_model = np.random.multinomial(nr_particles, model_weights)

    # N_BOOTSTR times, train test_transitions on n_per_model points, and
    # calculate the weights associated with test_X, for each model

    logger.debug("Start CV")
    futures = []
    for _ in range(N_BOOTSTR):
        futures.append(
            weights(n_per_model, transitions, test_transitions, test_X))

    logger.debug("Gathering futures")
    secede()
    chunked = client.gather(futures)
    rejoin()
    bootstr_w_at_test_X = [
        np.concatenate(chunk) for bs in chunked for chunk in bs
    ]
    del chunked
    per_model_w = [np.asarray(arr) for arr in zip(*bootstr_w_at_test_X)]

    # calculate the cv of the bootstrapped weights for each model
    variations_at_X = [st.variation(ws, axis=0) for ws in per_model_w]

    # normalize by number of samples per model
    model_weighted_variations_at_X = [
        var * n / n_per_model.sum()
        for var, n in zip(variations_at_X, n_per_model)
    ]

    # weight cvs by the point weights
    point_weighted_var_at_X = [
        var * w for var, w in zip(model_weighted_variations_at_X, test_w)
    ]

    # compute an "average coefficient of variation":
    # for each model, sum up the weighted cvs over the test points
    # then, take the sum over all models
    cv = sum(var.sum() for var in point_weighted_var_at_X)

    logger.debug("CV done")
    return float(cv), variations_at_X
示例#9
0
def _rejoin_pool_dask():
    from dask.distributed import rejoin
    rejoin()