def gradient(self): from numpy_sugar import epsilon from numpy_sugar.linalg import economic_qs mean = self._mean.value() cov = self._cov.value() gmean = self._mean.gradient() gcov = self._cov.gradient() try: self._ep.set_prior(mean, dict(QS=economic_qs(cov))) grad = dict() for n, g in iter(gmean.items()): grad["ExpFamGP[0]." + n] = self._ep.lml_derivative_over_mean(g) for n, g in iter(gcov.items()): QS = economic_qs(g) grad["ExpFamGP[1]." + n] = self._ep.lml_derivative_over_cov(QS) return grad except (ValueError, LinAlgError) as e: warnings.warn(str(e), RuntimeWarning) v = self._variables.select(fixed=False) return {i: -sign(v.get(i).value) / epsilon.small for i in v}
def test_economic_qs_linear(): random = RandomState(2951) G = random.randn(3, 5) QS0 = economic_qs_linear(G) QS1 = economic_qs(dot(G, G.T)) QS2 = economic_qs_linear(G, return_q1=False) assert_allclose(QS0[0][0], QS1[0][0]) assert_allclose(QS2[0][0], QS1[0][0]) assert_equal(len(QS2[0]), 1) assert_allclose(QS0[0][1], QS1[0][1]) assert_allclose(QS0[1], QS1[1]) assert_allclose(QS2[1], QS1[1]) G = G.T.copy() QS0 = economic_qs_linear(G) QS1 = economic_qs(dot(G, G.T)) idx = argsort(-1 * QS1[1]) QS1 = ((QS1[0][0][:, idx], QS1[0][1]), QS1[1][idx]) QS2 = economic_qs_linear(G, return_q1=False) assert_allclose(QS0[0][0], QS1[0][0]) assert_allclose(QS2[0][0], QS1[0][0]) assert_equal(len(QS2[0]), 1) assert_allclose(QS0[1], QS1[1]) assert_allclose(QS2[1], QS1[1])
def test_fast_scanner_effsizes_se(): full_rank_K = array([[6.0, 14.0, 23.0], [14.0, 51.0, 86.0], [23.0, 86.0, 150.0]]) full_rank_QS = economic_qs(full_rank_K) _test_fast_scanner_effsizes_se(full_rank_K, full_rank_QS, 0.2) _test_fast_scanner_effsizes_se(full_rank_K, full_rank_QS, 0.0) low_rank_K = array([[5.0, 14.0, 23.0], [14.0, 50.0, 86.0], [23.0, 86.0, 149.0]]) low_rank_QS = economic_qs(low_rank_K) _test_fast_scanner_effsizes_se(low_rank_K, low_rank_QS, 0.2)
def test_glmmexpfam_optimize(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() z = random.multivariate_normal(0.2 * ones(nsamples), K) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -29.102168129099287, atol=ATOL, rtol=RTOL) glmm.fix("beta") glmm.fix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -27.635788105778012, atol=ATOL, rtol=RTOL) glmm.unfix("beta") glmm.unfix("scale") glmm.fit(verbose=False) assert_allclose(glmm.lml(), -19.68486269551159, atol=ATOL, rtol=RTOL)
def estimate(y_phe, lik, kin, marker_mat=None, verbose=True): ''' estimate variance components ''' lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y_phe, M=marker_mat, K=kin) y_phe = data["y"] marker_mat = data["M"] kin = data["K"] assert_finite(y_phe, marker_mat, kin) if kin is not None: # K = K / diag(K).mean() q_s = economic_qs(kin) else: q_s = None if lik_name == "normal": method = LMM(y_phe.values, marker_mat.values, q_s, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y_phe, lik, marker_mat.values, q_s, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) v_g = method.scale * (1 - method.delta) v_e = method.scale * method.delta if lik_name == "bernoulli": v_e += pi * pi / 3 v_v = var(method.mean()) return v_g, v_v, v_e
def _normal_lml(self): self._update() m = self.m() ttau = self._sitelik_tau teta = self._sitelik_eta # NEW PHENOTYPE y = teta.copy() # NEW MEAN m = ttau * m # NEW COVARIANCE K = self.K() K = ddot(ttau, ddot(K, ttau, left=False), left=True) sum2diag(K, ttau, out=K) (Q, S0) = economic_qs(K) Q0, Q1 = Q from ...lmm import FastLMM from numpy import newaxis fastlmm = FastLMM(y, Q0, Q1, S0, covariates=m[:, newaxis]) fastlmm.learn(progress=False) return fastlmm.lml()
def test_lmm_scan_interface(): y = array([-1.0449132, 1.15229426, 0.79595129]) low_rank_K = array([[5.0, 14.0, 23.0], [14.0, 50.0, 86.0], [23.0, 86.0, 149.0]]) QS = economic_qs(low_rank_K) X = ones((3, 1)) y[0] = nan with pytest.raises(ValueError): FastScanner(y, X, QS, 0.5) y[0] = inf with pytest.raises(ValueError): FastScanner(y, X, QS, 0.5) y[0] = 1 X[0, 0] = nan with pytest.raises(ValueError): FastScanner(y, X, QS, 0.5) y[0] = 1 X[0, 0] = 1 with pytest.raises(ValueError): FastScanner(y, X, QS, -1) with pytest.raises(ValueError): FastScanner(y, X, QS, nan)
def test_glmmexpfam_precise(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ["binomial", ntri], X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.scale = 1.0 assert_allclose(glmm.lml(), -44.74191041468836, atol=ATOL, rtol=RTOL) glmm.scale = 2.0 assert_allclose(glmm.lml(), -36.19907331929086, atol=ATOL, rtol=RTOL) glmm.scale = 3.0 assert_allclose(glmm.lml(), -33.02139830387104, atol=ATOL, rtol=RTOL) glmm.scale = 4.0 assert_allclose(glmm.lml(), -31.42553401678996, atol=ATOL, rtol=RTOL) glmm.scale = 5.0 assert_allclose(glmm.lml(), -30.507029479473243, atol=ATOL, rtol=RTOL) glmm.scale = 6.0 assert_allclose(glmm.lml(), -29.937569702301232, atol=ATOL, rtol=RTOL) glmm.delta = 0.1 assert_allclose(glmm.lml(), -30.09977907145003, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-3, rtol=RTOL)
def estimate(y, lik, K, M=None, verbose=True): from numpy_sugar.linalg import economic_qs from numpy import pi, var, diag from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM from limix._data._assert import assert_likelihood from limix._data import normalize_likelihood, conform_dataset from limix.qtl._assert import assert_finite from limix._display import session_block, session_line lik = normalize_likelihood(lik) lik_name = lik[0] with session_block("Heritability analysis", disable=not verbose): with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] assert_finite(y, M, K) if K is not None: # K = K / diag(K).mean() QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS, restricted=True) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 v = var(method.mean()) return g, v, e
def test_glmmnormal(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) M = random.randn(nsamples, 3) K = linear_eye_cov().value() QS = economic_qs(K) eta = random.randn(nsamples) tau = 10 * random.rand(nsamples) glmm = GLMMNormal(eta, tau, X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) assert_allclose(glmm.lml(), -19.284378946701814) assert_allclose(glmm._check_grad(), 0, atol=1e-3, rtol=RTOL) flmm = glmm.get_fast_scanner() r = flmm.fast_scan(M, verbose=False) assert_allclose(r["lml"], [9.64605678059, 9.17041834, 9.56927990771]) assert_allclose(r["effsizes1"], [-0.0758297759308, 0.0509863368859, 0.0876858800519]) assert_allclose( r["scale"], [0.0053192483818597395, 0.005850105527002988, 0.00540155776161286])
def test_glmmexpfam_delta_one_zero(): random = RandomState(1) n = 30 X = random.randn(n, 6) K = dot(X, X.T) K /= K.diagonal().mean() QS = economic_qs(K) ntri = random.randint(1, 30, n) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4, -0.2]) glmm.delta = 0 assert_allclose(glmm.lml(), -113.24570457063275) assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-2) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -98.21144899310399, atol=ATOL, rtol=RTOL) assert_allclose(glmm.delta, 0, atol=ATOL, rtol=RTOL) glmm.delta = 1 assert_allclose(glmm.lml(), -98.00058169240869, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(step=1e-4), 0, atol=1e-1) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -72.82680948264196, atol=ATOL, rtol=RTOL) assert_allclose(glmm.delta, 0.9999999850988439, atol=ATOL, rtol=RTOL)
def test_lmm_scan_very_low_rank(): y = array([-1.0449132, 1.15229426, 0.79595129, 2.1]) X = array([ [-0.40592765, 1.04348945], [0.92275415, -0.32394197], [-0.98197991, 1.22912219], [-1.0007991, 2.22912219], ]) G = array([ [-0.14505449, -1.1000817], [0.45714984, 1.82214436], [-1.23763742, 1.38771103], [-2.27377329, 0.9577192], ]) K = G @ G.T low_rank_QS = economic_qs(K) M = array([ [0.88766985, -1.80940339], [0.00822629, -0.4488265], [0.55807272, -2.00868376], [3.2, 2.1], ]) scanner = FastScanner(y, X, low_rank_QS, 0) r = scanner.fast_scan(M, verbose=False) assert_allclose(r["lml"], [46.512791016862764, 46.512791016862764], atol=1e-6, rtol=1e-6) assert_allclose( r["effsizes0"], [ [3.8616635463341358, 0.43233789455471455], [4.534162667593971, 3.573393734139044], ], ) assert_allclose(r["effsizes1"], [2.1553245206596263, -0.684698367443129], atol=1e-6, rtol=1e-6) assert_allclose(r["scale"], [5.551115123125783e-17, 2.5326962749261384e-16], atol=1e-6) X = ones((4, 1)) M = array([ [0.88766985, -1.80940339], [0.00822629, -0.4488265], [0.55807272, -2.00868376], [3.2, 2.1], ]) scanner = FastScanner(y, X, low_rank_QS, 0.75) r = scanner.fast_scan(M, verbose=False) assert_allclose(r["lml"], [-3.988506684733393, -2.852200552237104]) assert_allclose(r["effsizes0"], [[-0.4955288599792398], [0.36297469139979893]]) assert_allclose(r["effsizes1"], [0.5929013274071214, 0.36216887594630626]) assert_allclose(r["scale"], [0.18324637118292808, 0.10382205995195082], atol=1e-6)
def _background_decomposition(G, K): if G is None: (Q, S0) = economic_qs(K) else: (Q, S0) = economic_qs_linear(G) Q0 = Q[0] Q1 = Q[1] S0 /= S0.mean() return Q0, Q1, S0
def test_glmmexpfam_layout(): y = asarray([1.0, 0.5]) X = asarray([[0.5, 1.0]]) K = asarray([[1.0, 0.0], [0.0, 1.0]]) QS = economic_qs(K) with pytest.raises(ValueError): GLMMExpFam(y, "poisson", X, QS=QS) y = asarray([1.0]) with pytest.raises(ValueError): GLMMExpFam(y, "poisson", X, QS=QS)
def test_glmmexpfam_predict(): random = RandomState(4) n = 100 p = n + 1 X = ones((n, 2)) X[:, 1] = random.randn(n) G = random.randn(n, p) G /= G.std(0) G -= G.mean(0) G /= sqrt(p) K = dot(G, G.T) i = asarray(arange(0, n), int) si = random.choice(i, n, replace=False) ntest = int(n // 5) itrain = si[:-ntest] itest = si[-ntest:] Xtrain = X[itrain, :] Ktrain = K[itrain, :][:, itrain] Xtest = X[itest, :] beta = random.randn(2) z = random.multivariate_normal(dot(X, beta), 0.9 * K + 0.1 * eye(n)) ntri = random.randint(1, 100, n) nsuc = zeros(n, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) QStrain = economic_qs(Ktrain) nsuc_train = ascontiguousarray(nsuc[itrain]) ntri_train = ascontiguousarray(ntri[itrain]) nsuc_test = ascontiguousarray(nsuc[itest]) ntri_test = ascontiguousarray(ntri[itest]) glmm = GLMMExpFam(nsuc_train, ("binomial", ntri_train), Xtrain, QStrain) glmm.fit(verbose=False) ks = K[itest, :][:, itrain] kss = asarray([K[i, i] for i in itest]) pm = glmm.predictive_mean(Xtest, ks, kss) pk = glmm.predictive_covariance(Xtest, ks, kss) r = nsuc_test / ntri_test assert_(corrcoef([pm, r])[0, 1] > 0.8) assert_allclose(pk[0], 54.263705682514846)
def _fit_glmm_simple_model(self, verbose): from numpy_sugar.linalg import economic_qs from glimix_core.glmm import GLMMExpFam from numpy import asarray K = self._get_matrix_simple_model() y = asarray(self._y, float).ravel() QS = None if K is not None: QS = economic_qs(K) glmm = GLMMExpFam(y, self._lik, self._M, QS) glmm.fit(verbose=verbose) self._set_simple_model_variances(glmm.v0, glmm.v1) self._glmm = glmm
def _fit_lmm_simple_model(self, verbose): from numpy_sugar.linalg import economic_qs from glimix_core.lmm import LMM from numpy import asarray K = self._get_matrix_simple_model() y = asarray(self._y, float).ravel() QS = None if K is not None: QS = economic_qs(K) lmm = LMM(y, self._M, QS) lmm.fit(verbose=verbose) self._set_simple_model_variances(lmm.v0, lmm.v1) self._glmm = lmm
def value(self): from numpy_sugar import epsilon from numpy_sugar.linalg import economic_qs mean = self._mean.value() cov = self._cov.value() try: self._ep.set_prior(mean, dict(QS=economic_qs(cov))) lml = self._ep.lml() except (ValueError, LinAlgError) as e: warnings.warn(str(e), RuntimeWarning) lml = -1 / epsilon.small return lml
def _fit_lmm_multi_trait(self, verbose): from numpy import sqrt, asarray from glimix_core.lmm import Kron2Sum from numpy_sugar.linalg import economic_qs, ddot X = asarray(self._M, float) QS = economic_qs(self._covariance[0]._K) G = ddot(QS[0][0], sqrt(QS[1])) lmm = Kron2Sum(self._y, self._mean.A, X, G, rank=1, restricted=True) lmm.fit(verbose=verbose) self._glmm = lmm self._covariance[0]._set_kron2sum(lmm) self._covariance[1]._set_kron2sum(lmm) self._mean.B = lmm.B
def test_fast_scanner_statsmodel_gls(): import statsmodels.api as sm from numpy.linalg import lstsq def _lstsq(A, B): return lstsq(A, B, rcond=None)[0] data = sm.datasets.longley.load() data.exog = sm.add_constant(data.exog) ols_resid = sm.OLS(data.endog, data.exog).fit().resid resid_fit = sm.OLS(ols_resid[1:], sm.add_constant(ols_resid[:-1])).fit() rho = resid_fit.params[1] order = toeplitz(range(len(ols_resid))) sigma = rho ** order QS = economic_qs(sigma) lmm = LMM(data.endog, data.exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() best_beta_se = _lstsq(data.exog.T @ _lstsq(lmm.covariance(), data.exog), eye(7)) best_beta_se = sqrt(best_beta_se.diagonal()) assert_allclose(scanner.null_beta_se, best_beta_se, atol=1e-5) endog = data.endog.copy() endog -= endog.mean(0) endog /= endog.std(0) exog = data.exog.copy() exog -= exog.mean(0) with errstate(invalid="ignore", divide="ignore"): exog /= exog.std(0) exog[:, 0] = 1 lmm = LMM(endog, exog, QS) lmm.fit(verbose=False) sigma = lmm.covariance() scanner = lmm.get_fast_scanner() gls_model = sm.GLS(endog, exog, sigma=sigma) gls_results = gls_model.fit() beta_se = gls_results.bse our_beta_se = sqrt(scanner.null_beta_covariance.diagonal()) # statsmodels scales the covariance matrix we pass, that is why # we need to account for it here. assert_allclose(our_beta_se, beta_se / sqrt(gls_results.scale)) assert_allclose(scanner.null_beta_se, beta_se / sqrt(gls_results.scale))
def get_fast_scanner(self): r"""Return :class:`glimix_core.lmm.FastScanner` for the current delta.""" from numpy_sugar.linalg import ddot, economic_qs, sum2diag y = self.eta / self.tau if self._QS is None: K = eye(y.shape[0]) / self.tau else: Q0 = self._QS[0][0] S0 = self._QS[1] K = dot(ddot(Q0, self.v0 * S0), Q0.T) K = sum2diag(K, 1 / self.tau) return FastScanner(y, self._X, economic_qs(K), self.v1)
def test_glmmexpfam_poisson(): from numpy import ones, stack, exp, zeros from numpy.random import RandomState from numpy_sugar.linalg import economic_qs from pandas import DataFrame random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) M = DataFrame(stack([offset, age], axis=1), columns=["offset", "age"]) M["sample"] = [f"sample{i}" for i in range(n)] M = M.set_index("sample") # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def test_glmmexpfam_delta1(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.delta = 1 assert_allclose(glmm.lml(), -47.09677870648636, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-4)
def test_glmmexpfam_scale_very_high(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = linear_eye_cov().value() QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = [random.randint(0, i) for i in ntri] glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) glmm.beta = asarray([1.0, 0, 0.5, 0.1, 0.4]) glmm.scale = 30.0 assert_allclose(glmm.lml(), -29.632791380478736, atol=ATOL, rtol=RTOL) assert_allclose(glmm._check_grad(), 0, atol=1e-3)
def test_glmmexpfam_optimize_low_rank(): nsamples = 10 random = RandomState(0) X = random.randn(nsamples, 5) K = dot(X, X.T) z = dot(X, 0.2 * random.randn(5)) QS = economic_qs(K) ntri = random.randint(1, 30, nsamples) nsuc = zeros(nsamples, dtype=int) for (i, ni) in enumerate(ntri): nsuc[i] += sum(z[i] + 0.2 * random.randn(ni) > 0) ntri = ascontiguousarray(ntri) glmm = GLMMExpFam(nsuc, ("binomial", ntri), X, QS) assert_allclose(glmm.lml(), -18.60476792256323, atol=ATOL, rtol=RTOL) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -7.800621320491801, atol=ATOL, rtol=RTOL)
def test_glmmexpfam_binomial_large_ntrials(): random = RandomState(0) n = 10 X = random.randn(n, 2) G = random.randn(n, 100) K = dot(G, G.T) ntrials = random.randint(1, 100000, n) z = dot(G, random.randn(100)) / sqrt(100) successes = zeros(len(ntrials), int) for i in range(len(ntrials)): for _ in range(ntrials[i]): successes[i] += int(z[i] + 0.1 * random.randn() > 0) QS = economic_qs(K) glmm = GLMMExpFam(successes, ("binomial", ntrials), X, QS) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -43.067433588125446)
def run_limix(Y, X, G, K, etaMax=0.99): N, M = G.shape _, D = Y.shape QS = economic_qs(K) Z = np.zeros((M, D)) for d in range(D): lmm = LMM(Y[:, d], X, QS, restricted=False) lmm.fit(verbose=False) delta = lmm.v0 / (lmm.v0 + lmm.v1) if delta > etaMax: lmm = LMM(Y[:, d], X, QS, restricted=True) lmm.fit(verbose=False) ret = lmm.get_fast_scanner().fast_scan(G, False) wald = ret['effsizes1'] / ret['effsizes1_se'] Z[:, d] = wald return Z.T
def get_normal_likelihood_trick(self): # Covariance: nK = K + \tilde\Sigma = K + 1/self._sitelik_tau # via (K + 1/self._sitelik_tau)^{-1} = A1 - A1QB1^-1QTA1 # Mean: \mathbf m # New phenotype: \tilde\mu # # I.e.: \tilde\mu \sim N(\mathbf m, K + \tilde\Sigma) # # # We transform the above Normal in an equivalent but more robust # one: \tilde\y \sim N(\tilde\m, \tilde\nK + \Sigma^{-1}) # # \tilde\y = \tilde\Sigma^{-1} \tilde\mu # \tilde\m = \tilde\Sigma^{-1} \tilde\m # \tilde\nK = \tilde\Sigma^{-1} \nK \tilde\Sigma^{-1} m = self.m() ttau = self._sitelik_tau teta = self._sitelik_eta # NEW PHENOTYPE y = teta.copy() # NEW MEAN m = ttau * m # NEW COVARIANCE K = self.K() K = ddot(ttau, ddot(K, ttau, left=False), left=True) sum2diag(K, ttau, out=K) (Q, S0) = economic_qs(K) Q0, Q1 = Q from ...lmm import FastLMM from numpy import newaxis fastlmm = FastLMM(y, Q0, Q1, S0, covariates=m[:, newaxis]) fastlmm.learn(progress=False) return fastlmm.get_normal_likelihood_trick()
def test_glmmexpfam_poisson(): random = RandomState(1) # sample size n = 30 # covariates offset = ones(n) * random.randn() age = random.randint(16, 75, n) M = stack((offset, age), axis=1) # genetic variants G = random.randn(n, 4) # sampling the phenotype alpha = random.randn(2) beta = random.randn(4) eps = random.randn(n) y = M @ alpha + G @ beta + eps # Whole genotype of each sample. X = random.randn(n, 50) # Estimate a kinship relationship between samples. X_ = (X - X.mean(0)) / X.std(0) / sqrt(X.shape[1]) K = X_ @ X_.T + eye(n) * 0.1 # Update the phenotype y += random.multivariate_normal(zeros(n), K) y = (y - y.mean()) / y.std() z = y.copy() y = random.poisson(exp(z)) M = M - M.mean(0) QS = economic_qs(K) glmm = GLMMExpFam(y, "poisson", M, QS) assert_allclose(glmm.lml(), -52.479557279193585) glmm.fit(verbose=False) assert_allclose(glmm.lml(), -34.09720756737648)
def _genetic_preprocess(X, G, K, background): logger = logging.getLogger(__name__) logger.info("Number of candidate markers to scan: %d", X.shape[1]) if K is not None: background.provided_via_variants = False logger.info('Covariace matrix normalization.') gower_normalization(K, out=K) if G is not None: background.provided_via_variants = True background.nvariants = G.shape[1] background.constant_nvariants = sum(G.std(0) == 0) logger.info('Genetic markers normalization.') stdnorm(G, 0, out=G) G /= sqrt(G.shape[1]) if G is None and K is None: raise Exception('G and K cannot be both None.') logger.info('Computing the economic eigen decomposition.') if K is None: QS = economic_qs_linear(G) else: QS = economic_qs(K) Q0, Q1 = QS[0] S0 = QS[1] background.background_rank = len(S0) logger.info('Genetic marker candidates normalization.') stdnorm(X, 0, out=X) X /= sqrt(X.shape[1]) return (Q0, Q1, S0)
def test_lmm_kron_scan_with_lmm(): random = RandomState(0) n = 15 Y = random.randn(n, 3) A = random.randn(3, 3) A = A @ A.T F = random.randn(n, 2) G = random.randn(n, 6) klmm = Kron2Sum(Y, A, F, G, restricted=True) klmm.fit(verbose=False) kscan = klmm.get_fast_scanner() K = klmm.covariance() X = kron(A, F) QS = economic_qs(K) scan = FastScanner(vec(Y), X, QS, 0.0) assert_allclose(klmm.covariance(), K) assert_allclose(kscan.null_scale, scan.null_scale) assert_allclose(kscan.null_beta, scan.null_beta) assert_allclose(kscan.null_lml(), scan.null_lml()) assert_allclose(kscan.null_beta_covariance, scan.null_beta_covariance) A1 = random.randn(3, 2) F1 = random.randn(n, 2) M = kron(A1, F1) kr = kscan.scan(A1, F1) r = scan.scan(M) assert_allclose(kr["lml"], r["lml"]) assert_allclose(kr["scale"], r["scale"]) assert_allclose(vec(kr["effsizes0"]), r["effsizes0"]) assert_allclose(vec(kr["effsizes1"]), r["effsizes1"]) assert_allclose(vec(kr["effsizes0_se"]), r["effsizes0_se"]) assert_allclose(vec(kr["effsizes1_se"]), r["effsizes1_se"])
def test_economic_qs(): random = RandomState(633) A = random.randn(10, 10) Q, S = economic_qs(A) e = [] e.append([-0.30665477, -0.06935249, -0.19790895, -0.31966245, -0.2041274]) e.append([-0.41417631, 0.70463554, -0.029418, 0.23839354, -0.01000668]) e.append([-0.15609931, 0.30659134, 0.12898542, 0.21192988, 0.40325725]) e.append([0.23709357, 0.03994193, -0.12559863, -0.08280338, 0.07192297]) e.append([0.03497126, -0.2059239, 0.13106679, -0.28509727, 0.42246837]) e.append([0.01812449, 0.22538233, -0.8011112, -0.24884526, 0.09372236]) e.append([0.21498028, 0.45112184, 0.49290517, -0.46891478, -0.06614824]) e.append([-0.26556389, -0.12489177, 0.09633139, 0.183802, -0.6557185]) e.append([0.2334407, 0.24061674, -0.0327931, -0.42773492, -0.37992895]) e.append([-0.69358038, -0.1813236, 0.12363693, -0.45758927, 0.15649533]) assert_allclose(Q[0], e, rtol=1e-5) e = [] e.append([0.2602789, -0.16562995, 0.3652314, 0.69671087, -0.06445665]) e.append([-0.08773482, -0.01183062, 0.17659833, -0.12014128, -0.46977809]) e.append([-0.28447028, 0.05696058, 0.0320373, 0.37750777, 0.6555612]) e.append([-0.52350264, -0.24840377, -0.50625997, 0.4225672, -0.37916396]) e.append([-0.31660658, 0.56505551, 0.39638672, 0.04048807, -0.31803257]) e.append([0.1010443, 0.37774975, -0.21705131, -0.10319173, 0.16038291]) e.append([0.34690429, 0.24425194, -0.2942541, 0.12823675, 0.00535483]) e.append([-0.28705797, 0.54312199, -0.19616314, 0.13677848, 0.07922182]) e.append([-0.48731195, -0.23394529, 0.38236667, -0.24101999, 0.25046401]) e.append([-0.13751969, -0.19006396, -0.32029686, -0.27120733, 0.07565524]) assert_allclose(Q[1], e, rtol=1e-5) e = [0.96267995, 1.51363689, 2.17446661, 2.73659799, 5.83305263] assert_allclose(S, e, rtol=1e-5)