Пример #1
0
    def run_bootstrap(self, bootstrap_idx):
        betas, betas_resc = [], []

        # Select the appropriate bootstrap from each task and stash the data into X and Y
        for k in range(self._n_tasks):
            X = self._task_design[k].get_bootstrap(self._task_bootstraps[k][bootstrap_idx])
            Y = self._task_response[k].get_bootstrap(self._task_bootstraps[k][bootstrap_idx])

            # Make sure that the priors align to the expression matrix
            priors_data = self._task_priors[k].reindex(labels=self._targets, axis=0). \
                reindex(labels=self._regulators, axis=1). \
                fillna(value=0)

            if self.clr_only:
                # Create a mock prior with no information if clr_only is set
                priors_data = pd.DataFrame(0, index=priors_data.index, columns=priors_data.columns)

            MPControl.sync_processes(pref="bbsr_pre")

            Debug.vprint('Calculating MI, Background MI, and CLR Matrix', level=0)
            clr_matrix, _ = self.mi_driver().run(Y, X, return_mi=False)

            Debug.vprint('Calculating task {k} betas using BBSR'.format(k=k), level=0)
            t_beta, t_br = BBSR(X, Y, clr_matrix, priors_data,
                                prior_weight=self.prior_weight, no_prior_weight=self.no_prior_weight,
                                nS=self.bsr_feature_num).run()
            betas.append(t_beta)
            betas_resc.append(t_br)

        return betas, betas_resc
Пример #2
0
def context_likelihood_mi(x,
                          y,
                          bins=DEFAULT_NUM_BINS,
                          logtype=DEFAULT_LOG_TYPE,
                          return_mi=True):
    """
    Wrapper to calculate the Context Likelihood of Relatedness and Mutual Information for two data sets that have
    common condition rows. The y argument will be used to calculate background MI for the x & y MI.
    As an implementation detail, y will be cast to a dense array if it is sparse.
    X can be sparse with no internal copy.

    This function handles unpacking and packing the InferelatorData.

    :param x: An N x G InferelatorData object
    :type x: InferelatorData [N x G]
    :param y: An N x K InferelatorData object
    :type y: InferelatorData [N x K]
    :param logtype: The logarithm function to use when calculating information. Defaults to natural log (np.log)
    :type logtype: np.log func
    :param bins: Number of bins for discretizing continuous variables
    :type bins: int
    :param return_mi: Boolean for returning a MI object. Defaults to True
    :type return_mi: bool
    :return clr, mi: CLR and MI InferelatorData objects. Returns (CLR, None) if return_mi is False.
    :rtype InferelatorData, InferelatorData:
    """

    assert check.argument_integer(bins, allow_none=True)
    assert min(x.shape) > 0
    assert min(y.shape) > 0
    assert check.indexes_align((x.sample_names, y.sample_names))

    # Create dense output matrix and copy the inputs
    mi_r = x.gene_names
    mi_c = y.gene_names

    # Build a [G x K] mutual information array
    mi = mutual_information(x.expression_data,
                            y.expression_data,
                            bins,
                            logtype=logtype)
    array_set_diag(mi, 0., mi_r, mi_c)

    # Build a [K x K] mutual information array
    mi_bg = mutual_information(y.expression_data,
                               y.expression_data,
                               bins,
                               logtype=logtype)
    array_set_diag(mi_bg, 0., mi_c, mi_c)

    # Calculate CLR
    clr = calc_mixed_clr(mi, mi_bg)

    MPControl.sync_processes(pref=SYNC_CLR_KEY)

    mi = pd.DataFrame(mi, index=mi_r, columns=mi_c)
    clr = pd.DataFrame(clr, index=mi_r, columns=mi_c)

    return clr, mi if return_mi else None
Пример #3
0
 def run_bootstrap(self, bootstrap):
     X = self.design.get_bootstrap(bootstrap)
     Y = self.response.get_bootstrap(bootstrap)
     utils.Debug.vprint('Calculating betas using MEN', level=0)
     MPControl.sync_processes("pre-bootstrap")
     return ElasticNet(X,
                       Y,
                       self.random_seed,
                       parameters=self.elastic_net_parameters).run()
    def run_bootstrap(self, bootstrap_idx):
        betas, betas_resc = [], []

        # Select the appropriate bootstrap from each task and stash the data into X and Y
        for k in range(self._n_tasks):
            X = self._task_design[k].iloc[:, self._task_bootstraps[k][bootstrap_idx]].loc[self._regulators, :]
            Y = self._task_response[k].iloc[:, self._task_bootstraps[k][bootstrap_idx]].loc[self._targets, :]

            MPControl.sync_processes(pref="en_pre")

            utils.Debug.vprint('Calculating task {k} betas using MEN'.format(k=k), level=0)
            t_beta, t_br = ElasticNet(X, Y, random_seed=self.random_seed).run()
            betas.append(t_beta)
            betas_resc.append(t_br)

        return betas, betas_resc
Пример #5
0
    def run(self):
        """
        Execute regression separately on each response variable in the data

        :return: pd.DataFrame [G x K], pd.DataFrame [G x K]
            Returns the regression betas and beta error reductions for all threads if this is the master thread (rank 0)
            Returns None, None if it's a subordinate thread
        """

        run_data = self.regress()

        if MPControl.is_master:
            pileup_data = self.pileup_data(run_data)
        else:
            pileup_data = None, None

        MPControl.sync_processes("post_pileup")
        return pileup_data
Пример #6
0
    def run_regression(self):
        betas = []
        rescaled_betas = []

        MPControl.sync_processes("pre_regression")

        for idx, bootstrap in enumerate(self.get_bootstraps()):
            Debug.vprint('Bootstrap {} of {}'.format((idx + 1),
                                                     self.num_bootstraps),
                         level=0)
            np.random.seed(self.random_seed + idx)
            current_betas, current_rescaled_betas = self.run_bootstrap(
                bootstrap)
            if self.is_master():
                betas.append(current_betas)
                rescaled_betas.append(current_rescaled_betas)

            MPControl.sync_processes("post_bootstrap")

        return betas, rescaled_betas
Пример #7
0
    def run_bootstrap(self, bootstrap_idx):
        x, y = [], []

        # Select the appropriate bootstrap from each task and stash the data into X and Y
        for k in range(self.n_tasks):
            x.append(self.task_design[k].
                     iloc[:,
                          self.task_bootstraps[k][bootstrap_idx]].transpose())
            y.append(self.task_response[k].
                     iloc[:,
                          self.task_bootstraps[k][bootstrap_idx]].transpose())

        MPControl.sync_processes(pref="amusr_pre")
        regress = AMuSR_regression(x,
                                   y,
                                   tfs=self.regulators,
                                   genes=self.targets,
                                   priors=self.priors_data,
                                   prior_weight=self.prior_weight)
        return regress.run()
Пример #8
0
    def run_bootstrap(self, bootstrap_idx):
        betas, betas_resc = [], []

        # Select the appropriate bootstrap from each task and stash the data into X and Y
        for k in range(self._n_tasks):
            X = self._task_design[
                k].iloc[:, self._task_bootstraps[k][bootstrap_idx]].loc[
                    self._regulators, :]
            Y = self._task_response[
                k].iloc[:, self._task_bootstraps[k][bootstrap_idx]].loc[
                    self._targets, :]

            # Make sure that the priors align to the expression matrix
            priors_data = self._task_priors[k].reindex(labels=self._targets,
                                                       axis=0).fillna(value=0)
            priors_data = priors_data.reindex(labels=self._regulators,
                                              axis=1).fillna(value=0)

            MPControl.sync_processes(pref="bbsr_pre")

            utils.Debug.vprint('Calculating MI, Background MI, and CLR Matrix',
                               level=0)
            clr_matrix, mi_matrix = self.mi_driver(
                sync_in_tmp_path=self.mi_sync_path).run(X, Y)
            mi_matrix = None

            utils.Debug.vprint(
                'Calculating task {k} betas using BBSR'.format(k=k), level=0)
            t_beta, t_br = BBSR(X,
                                Y,
                                clr_matrix,
                                priors_data,
                                prior_weight=self.prior_weight,
                                no_prior_weight=self.no_prior_weight,
                                nS=self.bsr_feature_num).run()
            betas.append(t_beta)
            betas_resc.append(t_br)

        return betas, betas_resc
Пример #9
0
    def run(self, x_df, y_df, bins=None, logtype=DEFAULT_LOG_TYPE):
        """
        Wrapper to calculate the CLR and MI for two data sets that have common condition columns
        :param x_df: pd.DataFrame
        :param y_df: pd.DataFrame
        :param logtype: np.log func
        :param bins: int
            Number of bins for discretizing continuous variables
        :return clr, mi: pd.DataFrame, pd.DataFrame
            CLR and MI DataFrames
        """

        assert check.argument_integer(bins, allow_none=True)
        assert check.indexes_align((x_df.columns, y_df.columns))
        assert x_df.shape[0] > 0
        assert x_df.shape[1] > 0
        assert y_df.shape[0] > 0
        assert y_df.shape[1] > 0

        if bins is not None:
            self.bins = bins

        mi = mutual_information(y_df,
                                x_df,
                                self.bins,
                                temp_dir=self.temp_dir,
                                logtype=logtype)
        mi_bg = mutual_information(x_df,
                                   x_df,
                                   self.bins,
                                   temp_dir=self.temp_dir,
                                   logtype=logtype)
        clr = calc_mixed_clr(utils.df_set_diag(mi, 0),
                             utils.df_set_diag(mi_bg, 0))

        MPControl.sync_processes(pref=SYNC_CLR_KEY)

        return clr, mi
Пример #10
0
 def run_bootstrap(self, bootstrap):
     X = self.design.iloc[:, bootstrap]
     Y = self.response.iloc[:, bootstrap]
     utils.Debug.vprint('Calculating betas using MEN', level=0)
     MPControl.sync_processes("pre-bootstrap")
     return ElasticNet(X, Y).run()
Пример #11
0
 def test_sync(self):
     with self.assertRaises(RuntimeError):
         MPControl.sync_processes()
Пример #12
0
 def test_dask_cluster_sync(self):
     self.assertTrue(MPControl.sync_processes())
Пример #13
0
 def test_dask_local_sync(self):
     self.assertTrue(MPControl.sync_processes())
Пример #14
0
 def test_mp_sync(self):
     self.assertTrue(MPControl.sync_processes())
Пример #15
0
 def test_kvs_sync(self):
     self.assertEqual(MPControl.sync_processes(), None)