Exemplo n.º 1
0
    def regress(self):
        """
        Execute StARS

        :return: list
            Returns a list of regression results that base_regression's pileup_data can process
        """

        if MPControl.is_dask():
            from inferelator.distributed.dask_functions import lasso_stars_regress_dask
            return lasso_stars_regress_dask(self.X, self.Y, self.alphas, self.num_subsamples, self.random_seed,
                                            self.method, self.params, self.G, self.genes)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(gn=self.genes[j], i=j, total=self.G), level=level)

            data = stars_model_select(self.X.values,
                                      utils.scale_vector(self.Y.get_gene_data(j, force_dense=True, flatten=True)),
                                      self.alphas,
                                      method=self.method,
                                      num_subsamples=self.num_subsamples,
                                      random_seed=self.random_seed,
                                      **self.params)
            data['ind'] = j
            return data

        return MPControl.map(regression_maker, range(self.G), tell_children=False)
    def regress(self):
        """
        Execute Elastic Net

        :return: list
            Returns a list of regression results that base_regression's pileup_data can process
        """

        if MPControl.is_dask():
            from inferelator.distributed.dask_functions import sklearn_regress_dask
            return sklearn_regress_dask(self.X, self.Y, self.model, self.G,
                                        self.genes, self.min_coef)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(
                gn=self.genes[j], i=j, total=self.G),
                                 level=level)

            data = sklearn_gene(self.X.values,
                                utils.scale_vector(
                                    self.Y.get_gene_data(j,
                                                         force_dense=True,
                                                         flatten=True)),
                                copy.copy(self.model),
                                min_coef=self.min_coef)
            data['ind'] = j
            return data

        return MPControl.map(regression_maker,
                             range(self.G),
                             tell_children=False)
Exemplo n.º 3
0
    def regress(self):
        """
        Execute BBSR

        :return: pd.DataFrame [G x K], pd.DataFrame [G x K]
            Returns the regression betas and beta error reductions for all threads if this is the master thread (rank 0)
            Returns None, None if it's a subordinate thread
        """

        if MPControl.is_dask():
            from inferelator.distributed.dask_functions import bbsr_regress_dask
            return bbsr_regress_dask(self.X, self.Y, self.pp, self.weights_mat,
                                     self.G, self.genes, self.nS)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(
                gn=self.genes[j], i=j, total=self.G),
                                 level=level)

            data = bayes_stats.bbsr(
                self.X.values,
                utils.scale_vector(
                    self.Y.get_gene_data(j, force_dense=True, flatten=True)),
                self.pp.iloc[j, :].values.flatten(),
                self.weights_mat.iloc[j, :].values.flatten(),
                self.nS,
                ordinary_least_squares=self.ols_only)
            data['ind'] = j
            return data

        return MPControl.map(regression_maker,
                             range(self.G),
                             tell_children=False)
Exemplo n.º 4
0
    def regress(self):
        """
        Execute Elastic Net

        :return: list
            Returns a list of regression results that base_regression's pileup_data can process
        """

        if MPControl.client.name() == "dask":
            from inferelator.distributed.dask_functions import elasticnet_regress_dask
            return elasticnet_regress_dask(self.X, self.Y, self.params, self.G,
                                           self.genes)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(
                gn=self.genes[j], i=j, total=self.G),
                                 level=level)
            data = elastic_net(self.X.values, self.Y.iloc[j, :].values,
                               self.params)
            data['ind'] = j
            return data

        return MPControl.map(regression_maker,
                             range(self.G),
                             tell_children=False)
def _sim_ints(prob_dist, n_per_row, sparse=False, random_seed=42):

    if not np.isclose(np.sum(prob_dist), 1.):
        raise ValueError("Probability distribution does not sum to 1")

    ncols = len(prob_dist)

    def _sim_rows(n_vec, seed):
        row_data = np.zeros((len(n_vec), ncols), dtype=np.int32)

        rng = np.random.default_rng(seed=seed)
        col_ids = np.arange(ncols)

        for i, n in enumerate(n_vec):
            row_data[i, :] = np.bincount(rng.choice(col_ids,
                                                    size=n,
                                                    p=prob_dist),
                                         minlength=ncols)

        return _sparse.csr_matrix(row_data) if sparse else row_data

    ss = np.random.SeedSequence(random_seed)
    sim_data = MPControl.map(_sim_rows, _row_gen(n_per_row), _ss_gen(ss))

    return _sparse.vstack(sim_data) if sparse else np.vstack(sim_data)
def _sim_float(gene_centers, gene_sds, nrows, random_seed=42):

    ncols = len(gene_centers)
    assert ncols == len(gene_sds)

    def _sim_cols(cents, sds, seed):
        rng = np.random.default_rng(seed=seed)
        return rng.normal(loc=cents, scale=sds, size=(nrows, len(cents)))

    ss = np.random.SeedSequence(random_seed)

    return np.hstack(
        MPControl.map(_sim_cols, _col_gen(gene_centers), _col_gen(gene_sds),
                      _ss_gen(ss)))
Exemplo n.º 7
0
    def regress(self):
        """
        Execute multitask (AMUSR)

        :return: list
            Returns a list of regression results that the amusr_regression pileup_data can process
        """

        if MPControl.client.name() == "dask":
            from inferelator.distributed.dask_functions import amusr_regress_dask
            return amusr_regress_dask(
                self.X,
                self.Y,
                self.priors,
                self.prior_weight,
                self.n_tasks,
                self.genes,
                self.tfs,
                self.G,
                remove_autoregulation=self.remove_autoregulation)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(
                gn=self.genes[j], i=j, total=self.G),
                                 level=level)

            gene = self.genes[j]
            x, y, tasks = [], [], []

            if self.remove_autoregulation:
                tfs = [t for t in self.tfs if t != gene]
            else:
                tfs = self.tfs

            for k in range(self.n_tasks):
                if gene in self.Y[k]:
                    x.append(self.X[k].loc[:, tfs].values)  # list([N, K])
                    y.append(self.Y[k].loc[:, gene].values.reshape(
                        -1, 1))  # list([N, 1])
                    tasks.append(k)  # [T,]

            prior = format_prior(self.priors, gene, tasks, self.prior_weight)
            return run_regression_EBIC(x, y, tfs, tasks, gene, prior)

        return MPControl.map(regression_maker, range(self.G))
Exemplo n.º 8
0
def build_mi_array(X, Y, bins, logtype=DEFAULT_LOG_TYPE, temp_dir=None):
    """
    Calculate MI into an array

    :param X: np.ndarray (n x m1)
        Discrete array of bins
    :param Y: np.ndarray (n x m2)
        Discrete array of bins
    :param bins: int
        The total number of bins that were used to make the arrays discrete
    :param logtype: np.log func
        Which log function to use (log2 gives bits, ln gives nats)
    :param temp_dir: path
        Path to write temp files for multiprocessing
    :return mi: np.ndarray (m1 x m2)
        Returns the mutual information array
    """

    m1, m2 = X.shape[1], Y.shape[1]

    # Define the function which calculates MI for each variable in X against every variable in Y
    def mi_make(i):
        level = 2 if i % 1000 == 0 else 3
        Debug.allprint("Mutual Information Calculation [{i} / {total}]".format(
            i=i, total=m1),
                       level=level)

        discrete_X = _make_discrete(
            X[:, i].A.flatten() if sps.isspmatrix(X) else X[:, i].flatten(),
            bins)
        return [
            _calc_mi(_make_table(discrete_X, Y[:, j], bins), logtype=logtype)
            for j in range(m2)
        ]

    # Send the MI build to the multiprocessing controller
    mi_list = MPControl.map(mi_make, range(m1), tmp_file_path=temp_dir)

    # Convert the list of lists to an array
    mi = np.array(mi_list)
    assert (
        m1,
        m2) == mi.shape, "Array {sh} produced [({m1}, {m2}) expected]".format(
            sh=mi.shape, m1=m1, m2=m2)

    return mi
    def regress(self, regression_function=None):
        """
        Execute multitask (AMUSR)
        :return: list
            Returns a list of regression results that the amusr_regression pileup_data can process
        """

        regression_function = self.regression_function if regression_function is None else regression_function

        if MPControl.is_dask():
            from inferelator.distributed.dask_functions import amusr_regress_dask
            return amusr_regress_dask(
                self.X,
                self.Y,
                self.priors,
                self.prior_weight,
                self.n_tasks,
                self.genes,
                self.tfs,
                self.G,
                remove_autoregulation=self.remove_autoregulation,
                regression_function=regression_function,
                tol=self.tol,
                rel_tol=self.rel_tol,
                use_numba=self.use_numba)

        def regression_maker(j):
            level = 0 if j % 100 == 0 else 2
            utils.Debug.allprint(base_regression.PROGRESS_STR.format(
                gn=self.genes[j], i=j, total=self.G),
                                 level=level)

            gene = self.genes[j]
            x, y, tasks = [], [], []

            if self.remove_autoregulation:
                tfs = [t for t in self.tfs if t != gene]
            else:
                tfs = self.tfs

            for k in range(self.n_tasks):
                if gene in self.Y[k].gene_names:
                    x.append(self.X[k].get_gene_data(tfs))  # list([N, K])
                    y.append(self.Y[k].get_gene_data(
                        gene, force_dense=True).reshape(-1, 1))  # list([N, 1])
                    tasks.append(k)  # [T,]

            prior = format_prior(self.priors,
                                 gene,
                                 tasks,
                                 self.prior_weight,
                                 tfs=tfs)
            return regression_function(x,
                                       y,
                                       tfs,
                                       tasks,
                                       gene,
                                       prior,
                                       Cs=self.Cs,
                                       Ss=self.Ss,
                                       lambda_Bs=self.lambda_Bs,
                                       lambda_Ss=self.lambda_Ss,
                                       tol=self.tol,
                                       rel_tol=self.rel_tol,
                                       use_numba=self.use_numba)

        return MPControl.map(regression_maker, range(self.G))
Exemplo n.º 10
0
 def test_local_map(self):
     test_result = MPControl.map(math_function, *self.map_test_data)
     self.assertListEqual(test_result, self.map_test_expect)
Exemplo n.º 11
0
 def test_map(self):
     with self.assertRaises(RuntimeError):
         MPControl.map(math_function, *self.map_test_data)
Exemplo n.º 12
0
 def test_dask_cluster_map(self):
     with self.assertRaises(NotImplementedError):
         MPControl.map(math_function, *self.map_test_data)
Exemplo n.º 13
0
 def test_kvs_map_by_file(self):
     test_result = MPControl.map(math_function,
                                 *self.map_test_data,
                                 tell_children=True,
                                 tmp_file_path=self.temp_dir)
     self.assertListEqual(test_result, self.map_test_expect)
Exemplo n.º 14
0
 def test_kvs_map_distribute(self):
     test_result = MPControl.map(math_function,
                                 *self.map_test_data,
                                 tell_children=True)
     self.assertListEqual(test_result, self.map_test_expect)