def davies_pval(Q, F): """Wrapper around davies_pvalue that catches AssertionError.""" try: pval = davies_pvalue(Q, F) except AssertionError: print('Warning - Davies pvalue assertion error: zero p-value') pval = 0 return pval
def test_pval_calibration2(): dof = [1, 1, 1] w = [128372.0, 23720.11, 528372.0] loc = [0.0, 0.0, 0.0] random.seed(2) samples = _sample(w, dof, loc, n=5000) values = [davies_pvalue(s, diag(w)) for s in samples] assert_allclose(median(values), 0.5, rtol=1e-2)
def test_pval_calibration1(): dof = [1, 1, 1] w = [0.5, 0.4, 0.1] loc = [0.0, 0.0, 0.0] random.seed(1) samples = _sample(w, dof, loc, n=5000) pvals = liu_sf(samples, w, dof, loc, kurtosis=False)[0] assert_allclose(median(pvals), 0.5, rtol=1e-2) pvals = liu_sf(samples, w, dof, loc, kurtosis=True)[0] assert_allclose(median(pvals), 0.5, rtol=1e-2) values = [davies_pvalue(s, diag(w)) for s in samples] assert_allclose(median(values), 0.5, rtol=1e-2)
def score_2dof_inter(self, X): from numpy import empty from numpy_sugar import ddot Q_rho = self._score_stats(X.ravel(), [0]) g = X.ravel() Et = ddot(g, self._E) PEt = self._P(Et) EtPEt = Et.T @ PEt gPEt = g.T @ PEt n = Et.shape[1] + 1 F = empty((n, n)) F[0, 0] = 0 F[0, 1:] = gPEt F[1:, 0] = F[0, 1:] F[1:, 1:] = EtPEt F /= 2 return davies_pvalue(Q_rho[0], F)
def score_2dof_inter(self, X): """ Interaction test. Parameters ---------- X : 1d-array Genetic variant. Returns ------- float P-value. """ from numpy import empty from numpy_sugar import ddot Q_rho = self._score_stats(X.ravel(), [0]) g = X.ravel() Et = ddot(g, self._E) PEt = self._P(Et) EtPEt = Et.T @ PEt gPEt = g.T @ PEt n = Et.shape[1] + 1 F = empty((n, n)) F[0, 0] = 0 F[0, 1:] = gPEt F[1:, 0] = F[0, 1:] F[1:, 1:] = EtPEt F /= 2 return davies_pvalue(Q_rho[0], F)
def test_davies_pvalue(): with data_file("davies_pvalue.npz") as filepath: data = load(filepath, allow_pickle=True) assert_allclose(davies_pvalue(*data["args"]), data["pval"])
def score_2_dof(self, X, snp_dim="col", debug=False): """ Parameters ---------- X : (`N`, `1`) ndarray genotype vector (TODO: X should be small) Returns ------- pvalue : float P value """ import scipy as sp import scipy.linalg as la import scipy.stats as st # 1. calculate Qs and pvs Q_rho = sp.zeros(len(self.rho_list)) Py = P(self.gp, self.y) for i in range(len(self.rho_list)): rho = self.rho_list[i] LT = sp.vstack((rho ** 0.5 * self.vec_ones, (1 - rho) ** 0.5 * self.Env.T)) LTxoPy = sp.dot(LT, X * Py) Q_rho[i] = 0.5 * sp.dot(LTxoPy.T, LTxoPy) # Calculating pvs is split into 2 steps # If we only consider one value of rho i.e. equivalent to SKAT and used for interaction test if len(self.rho_list) == 1: rho = self.rho_list[0] L = sp.hstack((rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env)) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) try: pval = davies_pvalue(Q_rho[0], LToxPxoL) except AssertionError: eighQ, UQ = la.eigh(LToxPxoL) pval = mod_liu_corrected(Q_rho[0], eighQ) # Script ends here for interaction test return pval # or if we consider multiple values of rho i.e. equivalent to SKAT-O and used for association test else: pliumod = sp.zeros((len(self.rho_list), 4)) for i in range(len(self.rho_list)): rho = self.rho_list[i] L = sp.hstack( (rho ** 0.5 * self.vec_ones.T, (1 - rho) ** 0.5 * self.Env) ) xoL = X * L PxoL = P(self.gp, xoL) LToxPxoL = 0.5 * sp.dot(xoL.T, PxoL) eighQ, UQ = la.eigh(LToxPxoL) pliumod[i,] = mod_liu_corrected(Q_rho[i], eighQ) T = pliumod[:, 0].min() # if optimal_rho == 0.999: # optimal_rho = 1 # 2. Calculate qmin qmin = sp.zeros(len(self.rho_list)) percentile = 1 - T for i in range(len(self.rho_list)): q = st.chi2.ppf(percentile, pliumod[i, 3]) # Recalculate p-value for each Q rho of seeing values at least as extreme as q again using the modified matching moments method qmin[i] = (q - pliumod[i, 3]) / (2 * pliumod[i, 3]) ** 0.5 * pliumod[ i, 2 ] + pliumod[i, 1] # 3. Calculate quantites that occur in null distribution Px1 = P(self.gp, X) m = 0.5 * sp.dot(X.T, Px1) xoE = X * self.Env PxoE = P(self.gp, xoE) ETxPxE = 0.5 * sp.dot(xoE.T, PxoE) ETxPx1 = sp.dot(xoE.T, Px1) ETxPx11xPxE = 0.25 / m * sp.dot(ETxPx1, ETxPx1.T) ZTIminusMZ = ETxPxE - ETxPx11xPxE eigh, vecs = la.eigh(ZTIminusMZ) eta = sp.dot(ETxPx11xPxE, ZTIminusMZ) vareta = 4 * sp.trace(eta) OneZTZE = 0.5 * sp.dot(X.T, PxoE) tau_top = sp.dot(OneZTZE, OneZTZE.T) tau_rho = sp.zeros(len(self.rho_list)) for i in range(len(self.rho_list)): tau_rho[i] = self.rho_list[i] * m + (1 - self.rho_list[i]) / m * tau_top MuQ = sp.sum(eigh) VarQ = sp.sum(eigh ** 2) * 2 + vareta KerQ = sp.sum(eigh ** 4) / (sp.sum(eigh ** 2) ** 2) * 12 Df = 12 / KerQ # 4. Integration # from time import time # start = time() pvalue = optimal_davies_pvalue( qmin, MuQ, VarQ, KerQ, eigh, vareta, Df, tau_rho, self.rho_list, T ) # print("Elapsed: {} seconds".format(time() - start)) # Final correction to make sure that the p-value returned is sensible multi = 3 if len(self.rho_list) < 3: multi = 2 idx = sp.where(pliumod[:, 0] > 0)[0] pval = pliumod[:, 0].min() * multi if pvalue <= 0 or len(idx) < len(self.rho_list): pvalue = pval if pvalue == 0: if len(idx) > 0: pvalue = pliumod[:, 0][idx].min() if debug: info = { "Qs": Q_rho, "pvs_liu": pliumod, "qmin": qmin, "MuQ": MuQ, "VarQ": VarQ, "KerQ": KerQ, "lambd": eigh, "VarXi": vareta, "Df": Df, "tau": tau_rho, } return pvalue, info else: return pvalue