def _hinv(A00, A01, A11): from numpy_sugar import is_all_finite rcond = 1e-15 b = atleast_1d(A01) d = atleast_1d(A11) a = full_like(d, A00) m = maximum(maximum(npy_abs(b), npy_abs(d)), abs(a)) a /= m b = b / m c = b d = d / m bc = b * c ad = a * d with errstate(invalid="ignore", divide="ignore"): ai = a / (a * a - nan_to_num((bc * a) / d)) bi = b / (b * b - nan_to_num(ad)) di = d / (d * d - nan_to_num((bc * d) / a)) ai /= m bi /= m di /= m ok = is_all_finite(ai) and is_all_finite(bi) and is_all_finite(di) if not ok: ok = logical_and.reduce([isfinite(ai), isfinite(bi), isfinite(di)]) nok = logical_not(ok) U, S, VT = hsvd(a[nok], b[nok], d[nok]) maxi = maximum(npy_abs(S[0]), npy_abs(S[1])) cutoff = rcond * maxi large = S[0] > cutoff S[0] = divide(1, S[0], where=large, out=S[0]) S[0][~large] = 0 large = S[1] > cutoff S[1] = divide(1, S[1], where=large, out=S[1]) S[1][~large] = 0 SiVT = [[VT[0][0] * S[0], VT[0][1] * S[0]], [VT[1][0] * S[1], VT[1][1] * S[1]]] Ai = [ [ U[0][0] * SiVT[0][0] + U[0][1] * SiVT[1][0], U[0][0] * SiVT[0][1] + U[0][1] * SiVT[1][1], ], [ U[1][0] * SiVT[0][0] + U[1][1] * SiVT[1][0], U[1][0] * SiVT[0][1] + U[1][1] * SiVT[1][1], ], ] ai[nok] = Ai[0][0] / m bi[nok] = Ai[0][1] / m di[nok] = Ai[1][1] / m return ai, bi, di
def __init__(self, M, Q, S, overdispersion): self._cache_SQt = LRUCache(maxsize=1) self._cache_m = LRUCache(maxsize=1) self._cache_K = LRUCache(maxsize=1) self._cache_diagK = LRUCache(maxsize=1) self._cache_update = LRUCache(maxsize=1) self._cache_lml_components = LRUCache(maxsize=1) self._cache_L = LRUCache(maxsize=1) self._cache_A = LRUCache(maxsize=1) self._cache_C = LRUCache(maxsize=1) self._cache_BiQt = LRUCache(maxsize=1) self._cache_QBiQtAm = LRUCache(maxsize=1) self._cache_QBiQtCteta = LRUCache(maxsize=1) self._logger = logging.getLogger(__name__) if not is_all_finite(Q) or not is_all_finite(isfinite(S)): raise ValueError("There are non-finite numbers in the provided" + " eigen decomposition.") if S.min() <= 0: raise ValueError("The provided covariance matrix is not" + " positive-definite because the minimum" + " eigvalue is %f." % S.min()) make_sure_reasonable_conditioning(S) self._S = S self._Q = Q self.__QSQt = None nsamples = M.shape[0] self._previous_sitelik_tau = zeros(nsamples) self._previous_sitelik_eta = zeros(nsamples) self._sitelik_tau = zeros(nsamples) self._sitelik_eta = zeros(nsamples) self._cav_tau = zeros(nsamples) self._cav_eta = zeros(nsamples) self._joint_tau = zeros(nsamples) self._joint_eta = zeros(nsamples) self._v = None self._delta = 0 self._overdispersion = overdispersion self._tM = None self.__tbeta = None self._covariate_setup(M) self._loghz = empty(nsamples) self._hmu = empty(nsamples) self._hvar = empty(nsamples) self._ep_params_initialized = False
def assert_finite(Y, M, K): from numpy_sugar import is_all_finite if not is_all_finite(Y): raise ValueError("Outcome must have finite values only.") if not is_all_finite(M): raise ValueError("Covariates must have finite values only.") if K is not None: if not is_all_finite(K): raise ValueError("Covariate matrix must have finite values only.")
def __init__(self, y, Q0, Q1, S0, covariates=None): super(FastLMM, self).__init__(logistic=Scalar(0.0)) if not is_all_finite(y): raise ValueError("There are non-finite values in the phenotype.") self._flmmc = FastLMMCore(y, covariates, Q0, Q1, S0) self.set_nodata()
def _optimal_beta_nom(self): A = self._A() C = self._C() teta = self._sitelik_eta Cteta = C * teta v = Cteta - A * self._QBiQtCteta() if not is_all_finite(v): raise ValueError("beta_nom should not be %s." % str(v)) return v
def _optimal_tbeta_denom(self): L = self._L() Q = self._Q AM = ddot(self._A(), self._tM, left=True) QBiQtAM = dot(Q, cho_solve(L, dot(Q.T, AM))) v = dot(self._tM.T, AM) - dot(AM.T, QBiQtAM) if not is_all_finite(v): raise ValueError("tbeta_denom should not be %s." % str(v)) return v
def __init__(self, nsuccesses, ntrials): self.nsuccesses = ascontiguousarray(nsuccesses, dtype=float) self.ntrials = ascontiguousarray(ntrials, dtype=float) self.likelihood_name = 'Binomial' if is_all_equal(nsuccesses): raise ValueError("The phenotype array has a single unique value" + " only.") if not is_all_finite(nsuccesses): raise ValueError("There are non-finite numbers in phenotype.")
def __init__(self, y, mean, cov): from numpy_sugar import is_all_finite super(GP, self).__init__("GP", composite=[mean, cov]) if not is_all_finite(y): raise ValueError("There are non-finite values in the phenotype.") self._y = y self._cov = cov self._mean = mean
def _gradient_over_both(self): self._update() v = self.v delta = self.delta Q = self._Q S = self._S A = self._A() AQ = ddot(A, Q, left=True) SQt = ddot(S, Q.T, left=True) BiQt = self._BiQt() uBiQtAK0, uBiQtAK1 = self._uBiQtAK() C = self._C() m = self.m() teta = self._sitelik_eta Q = self._Q As = A.sum() Am = A * m Em = Am - A * self._QBiQtAm() Cteta = C * teta Eu = Cteta - A * self._QBiQtCteta() u = Em - Eu uKu = dot(u, self._Kdot(u)) tr1 = trace2(AQ, uBiQtAK0) tr2 = trace2(AQ, uBiQtAK1) dv = uKu / v dv -= (1 - delta) * trace2(AQ, SQt) dv -= delta * As dv += (1 - delta) * tr1 dv += delta * tr2 dv /= 2 dd = delta / (1 - delta) ddelta = -tr1 ddelta -= dd * tr2 ddelta += trace2(AQ, ddot(BiQt, A, left=False)) * (dd + 1) ddelta += (dd + 1) * dot(u, u) ddelta += trace2(AQ, SQt) ddelta -= As ddelta *= v ddelta -= uKu / (1 - delta) ddelta /= 2 v = asarray([dv, ddelta]) if not is_all_finite(v): raise ValueError("LML gradient should not be %s." % str(v)) return v
def _tbeta(self, value): self._cache_lml_components.clear() self._cache_QBiQtAm.clear() self._cache_m.clear() self._cache_update.clear() if not is_all_finite(value): raise ValueError("tbeta should not be %s." % str(value)) if self.__tbeta is None: self.__tbeta = asarray(value, float).copy() else: self.__tbeta[:] = value
def scan(self, M): """ LML, fixed-effect sizes, and scale of the candidate set. Parameters ---------- M : array_like Fixed-effects set. Returns ------- lml : float Log of the marginal likelihood. effsizes0 : ndarray Covariates fixed-effect sizes. effsizes0_se : ndarray Covariates fixed-effect size standard errors. effsizes1 : ndarray Candidate set fixed-effect sizes. effsizes1_se : ndarray Candidate fixed-effect size standard errors. scale : ndarray Optimal scale. """ from numpy_sugar.linalg import ddot from numpy_sugar import is_all_finite M = asarray(M, float) if M.shape[1] == 0: return { "lml": self.null_lml(), "effsizes0": self.null_beta, "effsizes0_se": self.null_beta_se, "effsizes1": empty((0)), "effsizes1_se": empty((0)), "scale": self.null_scale, } if not is_all_finite(M): raise ValueError("M parameter has non-finite elements.") MTQ = [dot(M.T, Q) for Q in self._QS[0] if Q.size > 0] yTBM = [dot(i, j.T) for (i, j) in zip(self._yTQDi, MTQ)] XTBM = [dot(i, j.T) for (i, j) in zip(self._XTQDi, MTQ)] D = self._D MTBM = [ddot(i, 1 / j) @ i.T for i, j in zip(MTQ, D) if j.min() > 0] return self._multicovariate_set(yTBM, XTBM, MTBM)
def append(self, K, name=None): from numpy_sugar import is_all_finite from numpy import asarray from glimix_core.cov import GivenCov data = conform_dataset(self._y, K=K) K = asarray(data["K"], float) if not is_all_finite(K): raise ValueError("Covariance-matrix values must be finite.") K = K / K.diagonal().mean() cov = GivenCov(K) if name is None: name = "unnamed-{}".format(self._unnamed) self._unnamed += 1 cov.name = name self._covariance.append(cov)
def scan(self, M): """ LML, fixed-effect sizes, and scale of the candidate set. Parameters ---------- M : array_like Fixed-effects set. Returns ------- lml : float Log of the marginal likelihood. effsizes0 : ndarray Covariates fixed-effect sizes. effsizes0_se : ndarray Covariates fixed-effect size standard errors. effsizes1 : ndarray Candidate set fixed-effect sizes. effsizes1_se : ndarray Candidate fixed-effect size standard errors. scale : ndarray Optimal scale. """ from numpy_sugar import is_all_finite M = asarray(M, float) if M.shape[1] == 0: return { "lml": self._null_lml, "effsizes0": self.null_beta, "effsizes0_se": self.null_beta_se, "effsizes1": empty((0)), "effsizes1_se": empty((0)), "scale": self.null_scale, } if not is_all_finite(M): raise ValueError("M parameter has non-finite elements.") BM = self._B.dot(M) yTBM = self._y.T @ BM XTBM = self._X.T @ BM MTBM = M.T @ BM return self._multicovariate_set(yTBM, XTBM, MTBM)
def scan(self, A1, X1): """ LML, fixed-effect sizes, and scale of the candidate set. Parameters ---------- A1 : (p, e) array_like Trait-by-environments design matrix. X1 : (n, m) array_like Variants set matrix. Returns ------- lml : float Log of the marginal likelihood for the set. effsizes0 : (c, p) ndarray Fixed-effect sizes for the covariates. effsizes0_se : (c, p) ndarray Fixed-effect size standard errors for the covariates. effsizes1 : (m, e) ndarray Fixed-effect sizes for the candidates. effsizes1_se : (m, e) ndarray Fixed-effect size standard errors for the candidates. scale : float Optimal scale. """ from numpy import empty from numpy.linalg import multi_dot from numpy_sugar import epsilon, is_all_finite from scipy.linalg import cho_solve A1 = asarray(A1, float) X1 = asarray(X1, float) if not is_all_finite(A1): raise ValueError("A1 parameter has non-finite elements.") if not is_all_finite(X1): raise ValueError("X1 parameter has non-finite elements.") if A1.shape[1] == 0: beta_se = sqrt(self.null_beta_covariance.diagonal()) return { "lml": self.null_lml(), "effsizes0": unvec(self.null_beta, (self._ncovariates, -1)), "effsizes0_se": unvec(beta_se, (self._ncovariates, -1)), "effsizes1": empty((0, )), "effsizes1_se": empty((0, )), "scale": self.null_scale, } X1X1 = X1.T @ X1 XX1 = self._X.T @ X1 AWA1 = self._WA.T @ A1 A1W = A1.T @ self._W GX1 = self._G.T @ X1 MRiM1 = kron(AWA1, XX1) M1RiM1 = kron(A1W @ A1, X1X1) M1Riy = vec(multi_dot([X1.T, self._Y, A1W.T])) XRiM1 = kron(self._WL0.T @ A1, GX1) ZiXRiM1 = cho_solve(self._Lz, XRiM1) MRiXZiXRiM1 = self._XRiM.T @ ZiXRiM1 M1RiXZiXRiM1 = XRiM1.T @ ZiXRiM1 M1RiXZiXRiy = XRiM1.T @ self._ZiXRiy T0 = [[self._MRiM, MRiM1], [MRiM1.T, M1RiM1]] T1 = [[self._MRiXZiXRiM, MRiXZiXRiM1], [MRiXZiXRiM1.T, M1RiXZiXRiM1]] T2 = [self._MRiy, M1Riy] T3 = [self._MRiXZiXRiy, M1RiXZiXRiy] MKiM = block(T0) - block(T1) MKiy = block(T2) - block(T3) beta = rsolve(MKiM, MKiy) mKiy = beta.T @ MKiy cp = self._ntraits * self._ncovariates effsizes0 = unvec(beta[:cp], (self._ncovariates, self._ntraits)) effsizes1 = unvec(beta[cp:], (X1.shape[1], A1.shape[1])) np = self._nsamples * self._ntraits sqrtdot = self._yKiy - mKiy scale = clip(sqrtdot / np, epsilon.tiny, inf) lml = self._static_lml() / 2 - np * safe_log(scale) / 2 - np / 2 effsizes_se = sqrt( clip(scale * pinv(MKiM).diagonal(), epsilon.tiny, inf)) effsizes0_se = unvec(effsizes_se[:cp], (self._ncovariates, self._ntraits)) effsizes1_se = unvec(effsizes_se[cp:], (X1.shape[1], A1.shape[1])) return { "lml": lml, "effsizes0": effsizes0, "effsizes1": effsizes1, "scale": scale, "effsizes0_se": effsizes0_se, "effsizes1_se": effsizes1_se, }
def __init__(self, y, X, QS=None, restricted=False): """ Constructor. Parameters ---------- y : array_like Outcome. X : array_like Covariates as a two-dimensional array. QS : tuple Economic eigendecompositon in form of ``((Q0, ), S0)`` of a covariance matrix ``K``. restricted : bool ``True`` for restricted maximum likelihood optimization; ``False`` otherwise. Defaults to ``False``. """ from numpy_sugar import is_all_finite logistic = Scalar(0.0) logistic.listen(self._delta_update) logistic.bounds = (-numbers.logmax, +numbers.logmax) Function.__init__(self, "LMM", logistic=logistic) self._logistic = logistic y = asarray(y, float).ravel() if not is_all_finite(y): raise ValueError("There are non-finite values in the outcome.") if len(y) == 0: raise ValueError("The outcome array is empty.") X = atleast_2d(asarray(X, float).T).T if not is_all_finite(X): raise ValueError("There are non-finite values in the covariates matrix.") self._optimal = {"beta": False, "scale": False} if QS is None: QS = economic_qs_zeros(len(y)) self._B = B(QS[0][0], QS[1], 0.0, 1.0) self.delta = 1.0 logistic.fix() else: self._B = B(QS[0][0], QS[1], 0.5, 0.5) self.delta = 0.5 if QS[0][0].shape[0] != len(y): msg = "Sample size differs between outcome and covariance decomposition." raise ValueError(msg) if y.shape[0] != X.shape[0]: msg = "Sample size differs between outcome and covariates." raise ValueError(msg) self._y = y self._Q0 = QS[0][0] self._S0 = QS[1] self._Xsvd = SVD(X) self._tbeta = zeros(self._Xsvd.rank) self._scale = 1.0 self._fix = {"beta": False, "scale": False} self._restricted = restricted
def __init__(self, Y, A, X, G, rank=1, restricted=False): """ Constructor. Parameters ---------- Y : (n, p) array_like Outcome matrix. A : (n, n) array_like Trait-by-trait design matrix. X : (n, c) array_like Covariates design matrix. G : (n, r) array_like Matrix G from the GGрхђ term. rank : optional, int Maximum rank of matrix CРѓђ. Defaults to ``1``. """ from numpy_sugar import is_all_finite Y = asfortranarray(Y, float) yrank = matrix_rank(Y) if Y.shape[1] > yrank: warnings.warn( f"Y is not full column rank: rank(Y)={yrank}. " + "Convergence might be problematic.", UserWarning, ) A = asarray(A, float) X = asarray(X, float) Xrank = matrix_rank(X) if X.shape[1] > Xrank: warnings.warn( f"X is not full column rank: rank(X)={Xrank}. " + "Convergence might be problematic.", UserWarning, ) G = asarray(G, float).copy() self._G_norm = max(G.min(), G.max()) G /= self._G_norm if not is_all_finite(Y): raise ValueError( "There are non-finite values in the outcome matrix.") if not is_all_finite(A): msg = "There are non-finite values in the trait-by-trait design matrix." raise ValueError(msg) if not is_all_finite(X): raise ValueError( "There are non-finite values in the covariates matrix.") if not is_all_finite(G): raise ValueError("There are non-finite values in the G matrix.") self._Y = Y self._cov = Kron2SumCov(G, Y.shape[1], rank) self._cov.listen(self._parameters_update) self._mean = KronMean(A, X) self._cache = {"terms": None} self._restricted = restricted composite = [("C0", self._cov.C0), ("C1", self._cov.C1)] Function.__init__(self, "Kron2Sum", composite=composite) nparams = self._mean.nparams + self._cov.nparams if nparams > Y.size: msg = "The number of parameters is larger than the outcome size." msg += " Convergence is expected to be problematic." warnings.warn(msg, UserWarning)
def estimate(y, lik, K, M=None, verbose=True): r"""Estimate the so-called narrow-sense heritability. It supports Normal, Bernoulli, Probit, Binomial, and Poisson phenotypes. Let :math:`N` be the sample size and :math:`S` the number of covariates. Parameters ---------- y : array_like Either a tuple of two arrays of `N` individuals each (Binomial phenotypes) or an array of `N` individuals (Normal, Poisson, or Bernoulli phenotypes). If a continuous phenotype is provided (i.e., a Normal one), make sure they have been normalised in such a way that its values are not extremely large; it might cause numerical errors otherwise. For example, by using :func:`limix.qc.mean_standardize` or :func:`limix.qc.quantile_gaussianize`. lik : "normal", "bernoulli", "probit", binomial", "poisson" Sample likelihood describing the residual distribution. K : array_like :math:`N`-by-:math:`N` covariance matrix. It might be, for example, the estimated kinship relationship between the individuals. The provided matrix will be normalised via the function :func:`limix.qc.normalise_covariance`. M : array_like, optional :math:`N` individuals by :math:`S` covariates. It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset covariate if ``None`` is passed. If an array is passed, it will used as is. Defaults to ``None``. verbose : bool, optional ``True`` to display progress and summary; ``False`` otherwise. Returns ------- float Estimated heritability. Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt >>> from numpy.random import RandomState >>> from limix.her import estimate >>> >>> random = RandomState(0) >>> >>> G = random.randn(150, 200) / sqrt(200) >>> K = dot(G, G.T) >>> z = dot(G, random.randn(200)) + random.randn(150) >>> y = random.poisson(exp(z)) >>> >>> print('%.3f' % estimate(y, 'poisson', K, verbose=False)) # doctest: +FLOAT_CMP 0.183 Notes ----- It will raise a ``ValueError`` exception if non-finite values are passed. Please, refer to the :func:`limix.qc.mean_impute` function for missing value imputation. """ from numpy_sugar import is_all_finite from numpy_sugar.linalg import economic_qs from numpy import ones, pi, var from glimix_core.glmm import GLMMExpFam from glimix_core.lmm import LMM if not isinstance(lik, (tuple, list)): lik = (lik,) lik_name = lik[0].lower() check_likelihood_name(lik_name) with session_block("heritability analysis", disable=not verbose): if M is None: M = ones((len(y), 1)) with session_line("Normalising input...", disable=not verbose): data = conform_dataset(y, M=M, K=K) y = data["y"] M = data["M"] K = data["K"] if not is_all_finite(y): raise ValueError("Outcome must have finite values only.") if not is_all_finite(M): raise ValueError("Covariates must have finite values only.") if K is not None: if not is_all_finite(K): raise ValueError("Covariate matrix must have finite values only.") K = normalise_covariance(K) y = normalise_extreme_values(y, lik) if K is not None: QS = economic_qs(K) else: QS = None if lik_name == "normal": method = LMM(y.values, M.values, QS) method.fit(verbose=verbose) else: method = GLMMExpFam(y, lik, M.values, QS, n_int=500) method.fit(verbose=verbose, factr=1e6, pgtol=1e-3) g = method.scale * (1 - method.delta) e = method.scale * method.delta if lik_name == "bernoulli": e += pi * pi / 3 if lik_name == "normal": v = method.fixed_effects_variance else: v = var(method.mean()) return g / (v + g + e)
def test_is_all_finite(): assert_equal(is_all_finite([1, -1, 2393.0]), True) assert_equal(is_all_finite([1, -1, nan, 2393.0]), False) assert_equal(is_all_finite([1, -1, inf, 2393.0]), False)
def st_scan(G, y, lik, K=None, M=None, verbose=True): r""" Single-variant association testing via generalised linear mixed models. It supports Normal (linear mixed model), Bernoulli, Probit, Binomial, and Poisson residual errors, defined by ``lik``. The columns of ``G`` define the candidates to be tested for association with the phenotype ``y``. The covariance matrix is set by ``K``. If not provided, or set to ``None``, the generalised linear model without random effects is assumed. The covariates can be set via the parameter ``M``. We recommend to always provide a column of ones when covariates are actually provided. Parameters ---------- G : array_like :math:`N` individuals by :math:`S` candidate markers. y : array_like An outcome array of :math:`N` individuals. lik : tuple, "normal", "bernoulli", "probit", binomial", "poisson" Sample likelihood describing the residual distribution. Either a tuple or a string specifiying the likelihood is required. The Normal, Bernoulli, Probit, and Poisson likelihoods can be selected by providing a string. Binomial likelihood on the other hand requires a tuple because of the number of trials: ``("binomial", array_like)``. K : array_like, optional :math:`N`-by-:math:`N` covariance matrix (e.g., kinship coefficients). Set to ``None`` for a generalised linear model without random effects. Defaults to ``None``. M : array_like, optional `N` individuals by `S` covariates. It will create a :math:`N`-by-:math:`1` matrix ``M`` of ones representing the offset covariate if ``None`` is passed. If an array is passed, it will used as is. Defaults to ``None``. verbose : bool, optional ``True`` to display progress and summary; ``False`` otherwise. Returns ------- :class:`limix.qtl.QTLModel` QTL representation. Examples -------- .. doctest:: >>> from numpy import dot, exp, sqrt, ones >>> from numpy.random import RandomState >>> from pandas import DataFrame >>> import pandas as pd >>> from limix.qtl import st_scan >>> >>> random = RandomState(1) >>> pd.options.display.float_format = "{:9.6f}".format >>> >>> n = 30 >>> p = 3 >>> samples_index = range(n) >>> >>> M = DataFrame(dict(offset=ones(n), age=random.randint(10, 60, n))) >>> M.index = samples_index >>> >>> X = random.randn(n, 100) >>> K = dot(X, X.T) >>> >>> candidates = random.randn(n, p) >>> candidates = DataFrame(candidates, index=samples_index, ... columns=['rs0', 'rs1', 'rs2']) >>> >>> y = random.poisson(exp(random.randn(n))) >>> >>> model = st_scan(candidates, y, 'poisson', K, M=M, verbose=False) >>> >>> model.variant_pvalues.to_dataframe() # doctest: +FLOAT_CMP pv candidate rs0 0.554444 rs1 0.218996 rs2 0.552200 >>> model.variant_effsizes.to_dataframe() # doctest: +FLOAT_CMP effsizes candidate rs0 -0.130867 rs1 -0.315078 rs2 -0.143869 >>> model.variant_effsizes_se.to_dataframe() # doctest: +FLOAT_CMP effsizes std candidate rs0 0.221390 rs1 0.256327 rs2 0.242013 >>> model # doctest: +FLOAT_CMP Variants -------- effsizes effsizes_se pvalues count 3 3 3 mean -0.196604 0.239910 0.441880 std 0.102807 0.017563 0.193027 min -0.315077 0.221389 0.218996 25% -0.229473 0.231701 0.385598 50% -0.143869 0.242013 0.552200 75% -0.137367 0.249170 0.553322 max -0.130866 0.256326 0.554443 <BLANKLINE> Covariate effect sizes for H0 ----------------------------- age offset -0.005568 0.395287 >>> from numpy import zeros >>> >>> nsamples = 50 >>> >>> X = random.randn(nsamples, 2) >>> G = random.randn(nsamples, 100) >>> K = dot(G, G.T) >>> ntrials = random.randint(1, 100, nsamples) >>> z = dot(G, random.randn(100)) / sqrt(100) >>> >>> successes = zeros(len(ntrials), int) >>> for i, nt in enumerate(ntrials): ... for _ in range(nt): ... successes[i] += int(z[i] + 0.5 * random.randn() > 0) >>> >>> result = st_scan(X, successes, ("binomial", ntrials), K, verbose=False) >>> print(result) # doctest: +FLOAT_CMP Variants -------- effsizes effsizes_se pvalues count 2 2 2 mean 0.227116 0.509575 0.478677 std 0.567975 0.031268 0.341791 min -0.174503 0.487466 0.236994 25% 0.026307 0.498520 0.357835 50% 0.227116 0.509575 0.478677 75% 0.427925 0.520630 0.599518 max 0.628735 0.531685 0.720359 <BLANKLINE> Covariate effect sizes for H0 ----------------------------- offset 0.409570 Notes ----- It will raise a ``ValueError`` exception if non-finite values are passed. Please, refer to the :func:`limix.qc.mean_impute` function for missing value imputation. """ from numpy_sugar import is_all_finite from numpy_sugar.linalg import economic_qs if not isinstance(lik, (tuple, list)): lik = (lik,) lik_name = lik[0].lower() lik = (lik_name,) + lik[1:] check_likelihood_name(lik_name) with session_block("qtl analysis", disable=not verbose): with session_line("Normalising input... ", disable=not verbose): data = conform_dataset(y, M, G=G, K=K) y = data["y"] M = data["M"] G = data["G"] K = data["K"] if not is_all_finite(y): raise ValueError("Outcome must have finite values only.") if not is_all_finite(M): raise ValueError("Covariates must have finite values only.") if K is not None: if not is_all_finite(K): raise ValueError("Covariate matrix must have finite values only.") QS = economic_qs(K) else: QS = None y = normalise_extreme_values(data["y"], lik) if lik_name == "normal": model = _perform_lmm(y.values, M, QS, G, verbose) else: model = _perform_glmm(y.values, lik, M, K, QS, G, verbose) if verbose: print(model) return model
def beta(self, value): if not is_all_finite(value): raise ValueError("beta should not be %s." % str(value)) self._tbeta = self._svd_S12 * dot(self._svd_V.T, value)
def scan(phenotype, X, G=None, K=None, covariates=None, progress=True, options=None): """Association between genetic variants and phenotype. Matrix `X` shall contain the genetic markers (e.g., number of minor alleles) with rows and columns representing samples and genetic markers, respectively. The user must specify only one of the parameters `G` and `K` for defining the genetic background. Let :math:`N` be the sample size, :math:`S` the number of covariates, :math:`P_c` the number of genetic markers to be tested, and :math:`P_b` the number of genetic markers used for Kinship estimation. Args: y (array_like): Phenotype. Dimension (:math:`N\\times 0`). X (array_like): Candidate genetic markers (or any other type of explanatory variable) whose association with the phenotype will be tested. Dimension (:math:`N\\times P_c`). G (array_like): Genetic markers matrix used internally for kinship estimation. Dimension (:math:`N\\times P_b`). K (array_like): Kinship matrix. Dimension (:math:`N\\times N`). covariates (array_like): Covariates. Default is an offset. Dimension (:math:`N\\times S`). progress (bool) : Shows progress. Defaults to `True`. Returns: A :class:`lim.genetics.qtl._canonical.CanonicalLRTScan` instance. """ logger = logging.getLogger(__name__) logger.info('%s association scan has started.', phenotype.likelihood_name) if options is None: options = dict() if 'fast' not in options: options['fast'] = True if 'rank_norm' not in options: options['rank_norm'] = True n = phenotype.sample_size covariates = ones((n, 1)) if covariates is None else covariates X = _clone(X) G = _clone(G) K = _clone(K) if not is_all_finite(X): raise ValueError("The candidate matrix X has non-finite values.") if G is not None and not is_all_finite(G): raise ValueError("The genetic markers matrix G has non-finite values.") if K is not None and not is_all_finite(K): raise ValueError("The Kinship matrix K has non-finite values.") background = Background() (Q0, Q1, S0) = _genetic_preprocess(X, G, K, background) qtl = QTLScan(phenotype, covariates, X, Q0, Q1, S0, options) qtl.progress = progress qtl.compute_statistics() return qtl