def LL(self, h, X=None, stack=True, REML=False): """ Computes the log-likelihood for a given heritability (h). If X==None, then the default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with the input X. If stack is false, then X is used in place of X0t in the LL calculation. REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ if X == None: X = self.X0t elif stack: self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0] X = self.X0t_stack n = float(self.N) q = float(X.shape[1]) beta, sigma, Q, XX_i, XX = self.getMLSoln(h, X) LL = n * np.log(2 * np.pi) + np.log(h * self.Kva + (1.0 - h)).sum() + n + n * np.log( 1.0 / n * Q) LL = -0.5 * LL if REML: LL_REML_part = q * np.log(2.0 * np.pi * sigma) + np.log( det(matrixMult(X.T, X))) - np.log(det(XX)) LL = LL + 0.5 * LL_REML_part LL = LL.sum() return LL, beta, sigma, XX_i
def calculateKinship(W,center=False): """ W is an n x m matrix encoding SNP minor alleles. This function takes a matrix oF SNPs, imputes missing values with the maf, normalizes the resulting vectors and returns the RRM matrix. """ n = W.shape[0] m = W.shape[1] keep = [] for i in range(m): mn = W[True - np.isnan(W[:,i]),i].mean() W[np.isnan(W[:,i]),i] = mn vr = W[:,i].var() if vr == 0: continue keep.append(i) W[:,i] = (W[:,i] - mn) / np.sqrt(vr) W = W[:,keep] K = matrixMult(W,W.T) * 1.0/float(m) if center: P = np.diag(np.repeat(1,n)) - 1/float(n) * np.ones((n,n)) S = np.trace(matrixMult(matrixMult(P,K),P)) K_n = (n - 1)*K / S return K_n return K
def calculateKinship(W, center=False): """ W is an n x m matrix encoding SNP minor alleles. This function takes a matrix oF SNPs, imputes missing values with the maf, normalizes the resulting vectors and returns the RRM matrix. """ n = W.shape[0] m = W.shape[1] keep = [] for i in range(m): mn = W[True - np.isnan(W[:, i]), i].mean() W[np.isnan(W[:, i]), i] = mn vr = W[:, i].var() if vr == 0: continue keep.append(i) W[:, i] = (W[:, i] - mn) / np.sqrt(vr) W = W[:, keep] K = matrixMult(W, W.T) * 1.0 / float(m) if center: P = np.diag(np.repeat(1, n)) - 1 / float(n) * np.ones((n, n)) S = np.trace(matrixMult(matrixMult(P, K), P)) K_n = (n - 1) * K / S return K_n return K
def LL(self,h,X=None,stack=True,REML=False): """ Computes the log-likelihood for a given heritability (h). If X==None, then the default X0t will be used. If X is set and stack=True, then X0t will be matrix concatenated with the input X. If stack is false, then X is used in place of X0t in the LL calculation. REML is computed by adding additional terms to the standard LL and can be computed by setting REML=True. """ if X == None: X = self.X0t elif stack: self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack n = float(self.N) q = float(X.shape[1]) beta,sigma,Q,XX_i,XX = self.getMLSoln(h,X) LL = n*np.log(2*np.pi) + np.log(h*self.Kva + (1.0-h)).sum() + n + n*np.log(1.0/n * Q) LL = -0.5 * LL if REML: LL_REML_part = q*np.log(2.0*np.pi*sigma) + np.log(det(matrixMult(X.T,X))) - np.log(det(XX)) LL = LL + 0.5*LL_REML_part LL = LL.sum() return LL,beta,sigma,XX_i
def transform(self): """ Computes a transformation on the phenotype vector and the covariate matrix. The transformation is obtained by left multiplying each parameter by the transpose of the eigenvector matrix of K (the kinship). """ self.Yt = matrixMult(self.Kve.T, self.Y) self.X0t = matrixMult(self.Kve.T, self.X0) self.X0t_stack = np.hstack([self.X0t, np.ones((self.N, 1))]) self.q = self.X0t.shape[1]
def transform(self): """ Computes a transformation on the phenotype vector and the covariate matrix. The transformation is obtained by left multiplying each parameter by the transpose of the eigenvector matrix of K (the kinship). """ self.Yt = matrixMult(self.Kve.T, self.Y) self.X0t = matrixMult(self.Kve.T, self.X0) self.X0t_stack = np.hstack([self.X0t, np.ones((self.N,1))]) self.q = self.X0t.shape[1]
def fit(self, X=None, ngrids=100, REML=True): """ Finds the maximum-likelihood solution for the heritability (h) given the current parameters. X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as the covariate matrix. This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. Given this optimum, the function computes the LL and associated ML solutions. """ if X == None: X = self.X0t else: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0] X = self.X0t_stack H = np.array(list(range(ngrids))) / float(ngrids) L = np.array([self.LL(h, X, stack=False, REML=REML)[0] for h in H]) self.LLs = L hmax = self.getMax(H, X, REML) L, beta, sigma, betaSTDERR = self.LL(hmax, X, stack=False, REML=REML) self.H = H self.optH = hmax.sum() self.optLL = L self.optBeta = beta self.optSigma = sigma.sum() return hmax, beta, sigma, L
def fit(self,X=None,ngrids=100,REML=True): """ Finds the maximum-likelihood solution for the heritability (h) given the current parameters. X can be passed and will transformed and concatenated to X0t. Otherwise, X0t is used as the covariate matrix. This function calculates the LLs over a grid and then uses .getMax(...) to find the optimum. Given this optimum, the function computes the LL and associated ML solutions. """ if X == None: X = self.X0t else: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack H = np.array(range(ngrids)) / float(ngrids) L = np.array([self.LL(h,X,stack=False,REML=REML)[0] for h in H]) self.LLs = L hmax = self.getMax(H,X,REML) L,beta,sigma,betaSTDERR = self.LL(hmax,X,stack=False,REML=REML) self.H = H self.optH = hmax.sum() self.optLL = L self.optBeta = beta self.optSigma = sigma.sum() return hmax,beta,sigma,L
def getMLSoln(self, h, X): """ Obtains the maximum-likelihood estimates for the covariate coefficients (beta), the total variance of the trait (sigma) and also passes intermediates that can be utilized in other functions. The input parameter h is a value between 0 and 1 and represents the heritability or the proportion of the total variance attributed to genetics. The X is the covariate matrix. """ S = 1.0 / (h * self.Kva + (1.0 - h)) Xt = X.T * S XX = matrixMult(Xt, X) XX_i = inv(XX) beta = matrixMult(matrixMult(XX_i, Xt), self.Yt) Yt = self.Yt - matrixMult(X, beta) Q = np.dot(Yt.T * S, Yt) sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) return beta, sigma, Q, XX_i, XX
def getMLSoln(self,h,X): """ Obtains the maximum-likelihood estimates for the covariate coefficients (beta), the total variance of the trait (sigma) and also passes intermediates that can be utilized in other functions. The input parameter h is a value between 0 and 1 and represents the heritability or the proportion of the total variance attributed to genetics. The X is the covariate matrix. """ S = 1.0/(h*self.Kva + (1.0 - h)) Xt = X.T*S XX = matrixMult(Xt,X) XX_i = inv(XX) beta = matrixMult(matrixMult(XX_i,Xt),self.Yt) Yt = self.Yt - matrixMult(X,beta) Q = np.dot(Yt.T*S,Yt) sigma = Q * 1.0 / (float(self.N) - float(X.shape[1])) return beta,sigma,Q,XX_i,XX
def association(self,X, h = None, stack=True,REML=True, returnBeta=False): """ Calculates association statitics for the SNPs encoded in the vector X of size n. If h == None, the optimal h stored in optH is used. """ if stack: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:,(self.q)] = matrixMult(self.Kve.T,X)[:,0] X = self.X0t_stack if h == None: h = self.optH L,beta,sigma,betaVAR = self.LL(h,X,stack=False,REML=REML) q = len(beta) ts,ps = self.tstat(beta[q-1],betaVAR[q-1,q-1],sigma,q) if returnBeta: return ts,ps,beta[q-1].sum(),betaVAR[q-1,q-1].sum()*sigma return ts,ps
def association(self, X, h=None, stack=True, REML=True, returnBeta=False): """ Calculates association statitics for the SNPs encoded in the vector X of size n. If h == None, the optimal h stored in optH is used. """ if stack: #X = np.hstack([self.X0t,matrixMult(self.Kve.T, X)]) self.X0t_stack[:, (self.q)] = matrixMult(self.Kve.T, X)[:, 0] X = self.X0t_stack if h == None: h = self.optH L, beta, sigma, betaVAR = self.LL(h, X, stack=False, REML=REML) q = len(beta) ts, ps = self.tstat(beta[q - 1], betaVAR[q - 1, q - 1], sigma, q) if returnBeta: return ts, ps, beta[q - 1].sum(), betaVAR[q - 1, q - 1].sum() * sigma return ts, ps