Exemplo n.º 1
0
def main():
    q = [1.5, 3.0]
    mu = -0.5
    var = 1.0
    kur = 3.0
    w = [10.0, 0.2, 0.1, 0.3]
    remain_var = 0.5
    df = 3.4
    trho = [5.1, 0.2]
    grid = [0.0, 0.01]
    print(optimal_davies_pvalue(q, mu, var, kur, w, remain_var, df, trho,
                                grid))
Exemplo n.º 2
0
    def score_2dof_assoc(self, X):
        from numpy import trace, sum, where, empty
        from numpy.linalg import eigvalsh

        Q_rho = self._score_stats(X.ravel(), self._rhos)
        null_lambdas = self._score_stats_null_dist(X.ravel())
        pliumod = self._score_stats_pvalue(Q_rho, null_lambdas)
        qmin = self._qmin(pliumod)

        # 3. Calculate quantites that occur in null distribution
        Px1 = self._P(X)
        m = 0.5 * (X.T @ Px1)
        xoE = X * self._E
        PxoE = self._P(xoE)
        ETxPxE = 0.5 * (xoE.T @ PxoE)
        ETxPx1 = xoE.T @ Px1
        ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T)
        ZTIminusMZ = ETxPxE - ETxPx11xPxE
        eigh = eigvalsh(ZTIminusMZ)

        eta = ETxPx11xPxE @ ZTIminusMZ
        vareta = 4 * trace(eta)

        OneZTZE = 0.5 * (X.T @ PxoE)
        tau_top = OneZTZE @ OneZTZE.T
        tau_rho = empty(len(self._rhos))
        for i in range(len(self._rhos)):
            tau_rho[i] = self._rhos[i] * m + (1 - self._rhos[i]) / m * tau_top

        MuQ = sum(eigh)
        VarQ = sum(eigh**2) * 2 + vareta
        KerQ = sum(eigh**4) / (sum(eigh**2)**2) * 12
        Df = 12 / KerQ

        # 4. Integration
        T = pliumod[:, 0].min()
        pvalue = optimal_davies_pvalue(qmin, MuQ, VarQ, KerQ, eigh, vareta, Df,
                                       tau_rho, self._rhos, T)

        # Final correction to make sure that the p-value returned is sensible
        multi = 3
        if len(self._rhos) < 3:
            multi = 2
        idx = where(pliumod[:, 0] > 0)[0]
        pval = pliumod[:, 0].min() * multi
        if pvalue <= 0 or len(idx) < len(self._rhos):
            pvalue = pval
        if pvalue == 0:
            if len(idx) > 0:
                pvalue = pliumod[:, 0][idx].min()

        return pvalue
Exemplo n.º 3
0
def test_optimal_davies_pvalue_bound():
    with data_file("bound.npz") as filepath:
        data = dict(load(filepath, allow_pickle=True))

    pval = optimal_davies_pvalue(
        data["qmin"],
        data["MuQ"],
        data["VarQ"],
        data["KerQ"],
        data["eigh"],
        data["vareta"],
        data["Df"],
        data["tau_rho"],
        data["rho_list"],
    )
    assert_allclose(pval, 0.22029543318607503)
Exemplo n.º 4
0
def test_optimal_davies_pvalue_nan():
    with data_file("danilo_nan.npz") as filepath:
        data = dict(load(filepath))

    pval = optimal_davies_pvalue(
        data["qmin"],
        data["MuQ"],
        data["VarQ"],
        data["KerQ"],
        data["eigh"],
        data["vareta"],
        data["Df"],
        data["tau_rho"],
        data["rho_list"],
    )
    assert_allclose(pval, 0.39344574097360585)
Exemplo n.º 5
0
    def score_2dof_assoc(self, X, return_rho=False):
        """
        Association test.

        Parameters
        ----------
        X : 1d-array
            Genetic variant.
        return_rho : bool (optional)
            ``True`` to return the optimal ¤Ђ; ``False`` otherwise (Default).

        Returns
        -------
        float
            P-value.
        float
            Optimal ¤Ђ. Returned only if ``return_rho == True``.
        """
        from numpy import empty, sum, trace, where
        from numpy.linalg import eigvalsh

        Q_rho = self._score_stats(X.ravel(), self._rhos)
        null_lambdas = self._score_stats_null_dist(X.ravel())
        pliumod = self._score_stats_pvalue(Q_rho, null_lambdas)
        optimal_rho = pliumod[:, 0].argmin()
        qmin = self._qmin(pliumod)

        # 3. Calculate quantites that occur in null distribution
        Px1 = self._P(X)
        m = 0.5 * (X.T @ Px1)
        xoE = X * self._E
        PxoE = self._P(xoE)
        ETxPxE = 0.5 * (xoE.T @ PxoE)
        ETxPx1 = xoE.T @ Px1
        ETxPx11xPxE = 0.25 / m * (ETxPx1 @ ETxPx1.T)
        ZTIminusMZ = ETxPxE - ETxPx11xPxE
        eigh = eigvalsh(ZTIminusMZ)

        eta = ETxPx11xPxE @ ZTIminusMZ
        vareta = 4 * trace(eta)

        OneZTZE = 0.5 * (X.T @ PxoE)
        tau_top = OneZTZE @ OneZTZE.T
        tau_rho = empty(len(self._rhos))
        for i in range(len(self._rhos)):
            tau_rho[i] = self._rhos[i] * m + (1 - self._rhos[i]) / m * tau_top

        MuQ = sum(eigh)
        VarQ = sum(eigh**2) * 2 + vareta
        KerQ = sum(eigh**4) / (sum(eigh**2)**2) * 12
        Df = 12 / KerQ

        # 4. Integration
        T = pliumod[:, 0].min()
        pvalue = optimal_davies_pvalue(qmin, MuQ, VarQ, KerQ, eigh, vareta, Df,
                                       tau_rho, self._rhos, T)

        # Final correction to make sure that the p-value returned is sensible
        multi = 3
        if len(self._rhos) < 3:
            multi = 2
        idx = where(pliumod[:, 0] > 0)[0]
        pval = pliumod[:, 0].min() * multi
        if pvalue <= 0 or len(idx) < len(self._rhos):
            pvalue = pval
        if pvalue == 0:
            if len(idx) > 0:
                pvalue = pliumod[:, 0][idx].min()

        if return_rho:
            return pvalue, optimal_rho
        return pvalue
Exemplo n.º 6
0
    def score_2_dof(self, X, snp_dim="col", debug=False):
        """
        Parameters
        ----------
        X : (`N`, `1`) ndarray
            genotype vector (TODO: X should be small)

        Returns
        -------
        pvalue : float
            P value
        """
        import scipy as sp
        import scipy.linalg as la
        import scipy.stats as st

        # 1. calculate Qs and pvs
        Q_rho = sp.zeros(len(self.rho_list))
        Py = P(self.gp, self.y)
        for i in range(len(self.rho_list)):
            rho = self.rho_list[i]
            LT = sp.vstack((rho ** 0.5 * self.vec_ones, (1 - rho) ** 0.5 * self.Env.T))
            LTxoPy = sp.dot(LT, X * Py)
            Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy)

        # Calculating pvs is split into 2 steps
        # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test
        if len(self.rho_list) == 1:
            rho = self.rho_list[0]
            L = sp.hstack((rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env))
            xoL = X * L
            PxoL = P(self.gp, xoL)
            LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
            try:
                pval = davies_pvalue(Q_rho[0], LToxPxoL)
            except AssertionError:
                eighQ, UQ = la.eigh(LToxPxoL)
                pval = mod_liu_corrected(Q_rho[0], eighQ)
            # Script ends here for interaction test
            return pval
        # or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test
        else:
            pliumod = sp.zeros((len(self.rho_list), 4))
            for i in range(len(self.rho_list)):
                rho = self.rho_list[i]
                L = sp.hstack(
                    (rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env)
                )
                xoL = X * L
                PxoL = P(self.gp, xoL)
                LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL)
                eighQ, UQ = la.eigh(LToxPxoL)
                pliumod[i,] = mod_liu_corrected(Q_rho[i], eighQ)
            T = pliumod[:, 0].min()
            # if optimal_rho == 0.999:
            #    optimal_rho = 1

            # 2. Calculate qmin
            qmin = sp.zeros(len(self.rho_list))
            percentile = 1 - T
            for i in range(len(self.rho_list)):
                q = st.chi2.ppf(percentile, pliumod[i, 3])
                # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method
                qmin[i] = (q - pliumod[i, 3]) / (2 * pliumod[i, 3]) ** 0.5 * pliumod[
                    i, 2
                ] + pliumod[i, 1]

            # 3. Calculate quantites that occur in null distribution
            Px1 = P(self.gp, X)
            m = 0.5 * sp.dot(X.T, Px1)
            xoE = X * self.Env
            PxoE = P(self.gp, xoE)
            ETxPxE = 0.5 * sp.dot(xoE.T, PxoE)
            ETxPx1 = sp.dot(xoE.T, Px1)
            ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T)
            ZTIminusMZ = ETxPxE - ETxPx11xPxE
            eigh, vecs = la.eigh(ZTIminusMZ)

            eta = sp.dot(ETxPx11xPxE, ZTIminusMZ)
            vareta = 4 * sp.trace(eta)

            OneZTZE = 0.5 * sp.dot(X.T, PxoE)
            tau_top = sp.dot(OneZTZE, OneZTZE.T)
            tau_rho = sp.zeros(len(self.rho_list))
            for i in range(len(self.rho_list)):
                tau_rho[i] = self.rho_list[i] * m + (1 - self.rho_list[i]) / m * tau_top

            MuQ = sp.sum(eigh)
            VarQ = sp.sum(eigh ** 2) * 2 + vareta
            KerQ = sp.sum(eigh ** 4) / (sp.sum(eigh ** 2) ** 2) * 12
            Df = 12 / KerQ

            # 4. Integration
            # from time import time
            # start = time()
            pvalue = optimal_davies_pvalue(
                qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self.rho_list, T
            )
            # print("Elapsed: {} seconds".format(time() - start))

            # Final correction to make sure that the p-value returned is sensible
            multi = 3
            if len(self.rho_list) < 3:
                multi = 2
            idx = sp.where(pliumod[:, 0] > 0)[0]
            pval = pliumod[:, 0].min() * multi
            if pvalue <= 0 or len(idx) < len(self.rho_list):
                pvalue = pval
            if pvalue == 0:
                if len(idx) > 0:
                    pvalue = pliumod[:, 0][idx].min()

            if debug:
                info = {
                    "Qs": Q_rho,
                    "pvs_liu": pliumod,
                    "qmin": qmin,
                    "MuQ": MuQ,
                    "VarQ": VarQ,
                    "KerQ": KerQ,
                    "lambd": eigh,
                    "VarXi": vareta,
                    "Df": Df,
                    "tau": tau_rho,
                }
                return pvalue, info
            else:
                return pvalue
Exemplo n.º 7
0
    def test(self, return_rho=False):
        """Tests for allelic imbalance.

        Args:
            return_rho: If True, return optimal rho.

        Returns:
            P-value and optimal rho if return_rho is True.
        """
        # compute score statistic for each rho
        Q_rho = self._compute_score()

        # compute parameters of the score distribution
        Fs, null_lambdas = self._compute_score_dist_parameters()

        # approximate score distribution for each rho
        if len(self.rhos) == 1:
            # approximate null distribution using Davies method
            pvalue = davies_pval(Q_rho[0], Fs[0])
            if return_rho:
                return pvalue, self.rhos[0]
            else:
                return pvalue

        # approximate their distributions using Liu's method:
        approx_out = self._approximate_score_dist(Q_rho, null_lambdas)

        if approx_out[:, 0].min() < 4e-14:
            # beyond Liu method's precision, use Davies + Bonferroni
            pvalues = [davies_pval(Q_rho[i], Fs[i]) for i in range(len(self.rhos))]
            pvalues = np.asarray(pvalues)
            min_idx = pvalues.argmin()
            pvalue = pvalues[min_idx] * len(self.rhos)
            if return_rho:
                return pvalue, self.rhos[min_idx]
            else:
                return pvalue


        # the smallest p-value will be the combined test statistic for all rhos
        T = approx_out[:, 0].min()


        optimal_rho = self.rhos[approx_out[:, 0].argmin()]

        # compute elements of the null distribution for T
        qmin = self._compute_qmin(approx_out)
        null_params = self._compute_null_parameters()

        # compute final p-value
        # return 2 * qmin, *null_params, self.rhos, T
        pvalue = optimal_davies_pvalue(2 * qmin, *null_params, self.rhos, T)

        # resort to Bonferroni in case of numerical issues
        # TODO find more robust estimation
        if pvalue <= 0:
            pvalue = T * len(self.rhos)

        if return_rho:
            return pvalue, optimal_rho
        return pvalue
Exemplo n.º 8
0
def test_optimal_davies_pvalue():
    with data_file("optimal_davies_pvalue.npz") as filepath:
        data = load(filepath, allow_pickle=True)

    pval = optimal_davies_pvalue(*data["args"])
    assert_allclose(pval, 0.9547608685218306)