def initialize_batch(X_bar0, P_bar0, x_bar0): """ Generate t=0 values for a new iteration from an initial state, covariance and a-priori estimate. """ # Get initial state and STM and initialize integrator X_bar0_list = X_bar0.T.tolist()[0] stm0 = sp.matrix(sp.eye(18)) stm0_list = sp.eye(18).reshape(1,324).tolist()[0] eom = ode(Udot).set_integrator('dop853', atol=1.0E-10, rtol=1.0E-9) eom.set_initial_value(X_bar0_list + stm0_list, 0) # Accumulate measurement at t=0 obs0 = OBS[0] stn0 = obs0[0] comp0, Htilda0 = Htilda_matrix(X_bar0_list, 0, stn0) resid0 = [ obs0[1] - float(comp0[0]), obs0[2] - float(comp0[1]) ] y0 = sp.matrix([resid0]).T H0 = Htilda0 * stm0 L0 = P_bar0.I + H0.T * W * H0 N0 = P_bar0.I * x_bar0 + H0.T * W * y0 return [stm0, comp0, resid0, Htilda0, H0, L0, N0, eom]
def _restore_CF_diag(self, dbg=False): nc = self.N_centre #Want: r[0 <= n < nc] diagonal Ui = sp.eye(self.D[nc], dtype=self.typ) for n in xrange(nc, 0, -1): self.r[n - 1], Um1, Um1_i = tm.restore_LCF_r(self.A[n], self.r[n], Ui, sanity_checks=self.sanity_checks) Ui = Um1_i #Now U is U_0 U = Um1 for s in xrange(self.q[0]): self.uni_l.A[0][s] = U.dot(self.uni_l.A[0][s]) self.uni_l.A[-1][s] = self.uni_l.A[-1][s].dot(Ui) self.uni_l.r[-1] = U.dot(self.uni_l.r[-1].dot(U.conj().T)) #And now: l[nc <= n <= N] diagonal if dbg: Um1 = sp.eye(self.D[nc - 1], dtype=self.typ) else: Um1 = mm.eyemat(self.D[nc - 1], dtype=self.typ) #FIXME: This only works if l[nc - 1] is a special matrix type for n in xrange(nc, self.N + 1): self.l[n], U, Ui = tm.restore_RCF_l(self.A[n], self.l[n - 1], Um1, sanity_checks=self.sanity_checks) Um1 = U #Now, Um1 = U_N Um1_i = Ui for s in xrange(self.q[0]): self.uni_r.A[0][s] = Um1.dot(self.uni_r.A[0][s]) self.uni_r.A[-1][s] = self.uni_r.A[-1][s].dot(Um1_i) self.uni_r.l[-1] = Um1_i.conj().T.dot(self.uni_r.l[-1].dot(Um1_i))
def build_np_models(kernel, trans_samples, ter_samples, ter_rew_samples, lamb): Xa, Ra, Xpa = zip(*trans_samples) Xa_term, Ra_term = zip(*ter_samples) Xa = [ np.vstack((xa, xa_term)) if xa_term.size > 0 else xa for xa, xa_term in izip(Xa, Xa_term) ] Ra = [ np.hstack((ra, ra_term)) if ra_term.size > 0 else ra for ra, ra_term in izip(Ra, Ra_term) ] k = len(trans_samples) # build the K_a,b matrices Kab = dict() for a,b in product(xrange(k), xrange(k)): if Xa_term[b].size > 0: Kab[(a,b)] = np.hstack((kernel(Xa[a], Xpa[b]), np.zeros((Xa[a].shape[0], Xa_term[b].shape[0])))) else: Kab[(a,b)] = kernel(Xa[a], Xpa[b]) # build the K_a, D_a matrices Ka = [kernel(Xa[i], Xa[i]) for i in xrange(k)] Dainv = [Ka[i] + lamb*scipy.eye(*Ka[i].shape) for i in xrange(k)] Da = [lu_factor(Dainv[i], overwrite_a = False) for i in xrange(k)] # build K_ter matrix Kterma = [ np.hstack((kernel(ter_rew_samples[0], Xpa[i]), np.zeros((ter_rew_samples[0].shape[0], Xa_term[i].shape[0])))) if Xa_term[i].size > 0 else kernel(ter_rew_samples[0], Xpa[i]) for i in xrange(k)] K_ter = kernel(ter_rew_samples[0], ter_rew_samples[0]) D_ter = lu_factor(K_ter + lamb*scipy.eye(*K_ter.shape), overwrite_a = True) R_ter = ter_rew_samples[1] return kernel, Kab, Da, Dainv, Ra, Kterma, D_ter, R_ter, Xa
def GetMat(self,s,sym=False): """Return the element transfer matrix for the TorsionalSpringDamper element. If sym=True, 's' must be a symbolic string and a matrix of strings will be returned. Otherwise, 's' is a numeric value (probably complex) and the matrix returned will be complex.""" N = self.maxsize if sym: myparams = self.symparams else: myparams = self.params Gc = self.Gc_func(s, myparams) Gp = self.Gp_func(s, myparams) G_tau = Gp/(1+Gc*Gp) #G_theta_d = Gc*Gp/(1+Gc*Gp) if sym: maxlen = len(G_tau) + 100 matout = eye(N,dtype = 'f') matout = matout.astype('S%d'%maxlen) else: matout = eye(N,dtype='D') matout[1,2] = G_tau #matout[1,4] = G_theta_d return matout
def test_lowrank_iso(self): theta = SP.array(SP.random.randn(2)**2) theta_hat = SP.exp(2*theta) _K = theta_hat[0]*SP.dot(self.Xtrain,self.Xtrain.T) + theta_hat[1]*SP.eye(self.n_train) _Kcross = theta_hat[0]*SP.dot(self.Xtrain,self.Xtest.T) _Kgrad_theta = [] _Kgrad_theta.append(2*theta_hat[0]*SP.dot(self.Xtrain,self.Xtrain.T) ) _Kgrad_theta.append(2*theta_hat[1]*SP.eye(self.n_train)) cov = lowrank.LowRankCF(self.n_dimensions) cov.X = self.Xtrain cov.Xcross = self.Xtest K = cov.K(theta) Kcross = cov.Kcross(theta) assert SP.allclose(K,_K), 'ouch, covariance matrix is wrong' assert SP.allclose(Kcross,_Kcross), 'ouch, cross covariance matrix is wrong' assert SP.allclose(_Kgrad_theta[0],cov.Kgrad_theta(theta,0)) assert SP.allclose(_Kgrad_theta[1],cov.Kgrad_theta(theta,1)) # gradient with respect to latent factors for i in range(self.n_dimensions): for j in range(self.n_train): Xgrad = SP.zeros(self.Xtrain.shape) Xgrad[j,i] = 1 _Kgrad_x = theta_hat[0]*(SP.dot(Xgrad,self.Xtrain.T) + SP.dot(self.Xtrain,Xgrad.T)) Kgrad_x = cov.Kgrad_x(theta,i,j) assert SP.allclose(Kgrad_x,_Kgrad_x), 'ouch, gradient with respect to x is wrong for entry [%d,%d]'%(i,j)
def genInitSigmaFactor(self): """ depending on the algorithm settings, we start out with in identity matrix, or perturb it """ if self.perturbedInitSigma: res = mat(eye(self.xdim)*self.initSigmaCoeff+randn(self.xdim, self.xdim)*self.initSigmaRandCoeff) else: res = mat(eye(self.xdim)*self.initSigmaCoeff) return res
def fitPairwiseModel(Y,XX=None,S_XX=None,U_XX=None,verbose=False): N,P = Y.shape """ initilizes parameters """ RV = fitSingleTraitModel(Y,XX=XX,S_XX=S_XX,U_XX=U_XX,verbose=verbose) Cg = covariance.freeform(2) Cn = covariance.freeform(2) gp = gp2kronSum(mean(Y[:,0:2]),Cg,Cn,XX=XX,S_XX=S_XX,U_XX=U_XX) conv2 = SP.ones((P,P),dtype=bool) rho_g = SP.ones((P,P)) rho_n = SP.ones((P,P)) for p1 in range(P): for p2 in range(p1): if verbose: print '.. fitting correlation (%d,%d)'%(p1,p2) gp.setY(Y[:,[p1,p2]]) Cg_params0 = SP.array([SP.sqrt(RV['varST'][p1,0]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,0])]) Cn_params0 = SP.array([SP.sqrt(RV['varST'][p1,1]),1e-6*SP.randn(),SP.sqrt(RV['varST'][p2,1])]) params0 = {'Cg':Cg_params0,'Cn':Cn_params0} conv2[p1,p2],info = OPT.opt_hyper(gp,params0,factr=1e3) rho_g[p1,p2] = Cg.K()[0,1]/SP.sqrt(Cg.K().diagonal().prod()) rho_n[p1,p2] = Cn.K()[0,1]/SP.sqrt(Cn.K().diagonal().prod()) conv2[p2,p1] = conv2[p1,p2]; rho_g[p2,p1] = rho_g[p1,p2]; rho_n[p2,p1] = rho_n[p1,p2] RV['Cg0'] = rho_g*SP.dot(SP.sqrt(RV['varST'][:,0:1]),SP.sqrt(RV['varST'][:,0:1].T)) RV['Cn0'] = rho_n*SP.dot(SP.sqrt(RV['varST'][:,1:2]),SP.sqrt(RV['varST'][:,1:2].T)) RV['conv2'] = conv2 #3. regularizes covariance matrices offset_g = abs(SP.minimum(LA.eigh(RV['Cg0'])[0].min(),0))+1e-4 offset_n = abs(SP.minimum(LA.eigh(RV['Cn0'])[0].min(),0))+1e-4 RV['Cg0_reg'] = RV['Cg0']+offset_g*SP.eye(P) RV['Cn0_reg'] = RV['Cn0']+offset_n*SP.eye(P) RV['params0_Cg']=LA.cholesky(RV['Cg0_reg'])[SP.tril_indices(P)] RV['params0_Cn']=LA.cholesky(RV['Cn0_reg'])[SP.tril_indices(P)] return RV
def GetAugMat(self, s, sym=False): """Return the augmented element transfer matrix for the AngularVelocitySource element, which includes the velocity source portion of 1/s in the augmentend column for theta. If sym=True, 's' must be a symbolic string and a matrix of strings will be returned. Otherwise, 's' is a numeric value (probably complex) and the matrix returned will be complex.""" N = self.maxsize if sym: myparams=self.symparams matout=eye(N+1,dtype='f') matout=matout.astype('S30') else: matout=eye(N+1,dtype='D') myparams=self.params myrow = 1# hard coding for now#(self.params['axis']-1)*4+1#axis should be 1, 2, or 3 fourbyfour = self.GetMat(s, sym=sym) matout[0:4,0:4] = fourbyfour Gc = self.Gc_func(s, myparams) Gp = self.Gp_func(s, myparams) G_theta_d = Gc*Gp/(1+Gc*Gp) matout[myrow,N] = G_theta_d return matout
def comp_form_i(sys,obs,K,Ts,Cy=[[1]]): """Compact form Conroller+Observer+Integral part Only for discrete systems!!! Call: contr=comp_form_i(sys,obs,K,Ts[,Cy]) Parameters ---------- sys : System in State Space form obs : Observer in State Space form K: State feedback gains Ts: Sampling time Cy: feedback matric to choose the output for integral part Returns ------- contr: ss Controller """ if sys.Tsamp==0.0: print "contr_form_i works only with discrete systems!" return ny=shape(sys.C)[0] nu=shape(sys.B)[1] nx=shape(sys.A)[0] no=shape(obs.A)[0] ni=shape(Cy)[0] B_obsu = mat(obs.B[:,0:nu]) B_obsy = mat(obs.B[:,nu:nu+ny]) D_obsu = mat(obs.D[:,0:nu]) D_obsy = mat(obs.D[:,nu:nu+ny]) k=mat(K) nk=shape(k)[1] Ke=k[:,nk-ni:] K=k[:,0:nk-ni] X = inv(eye(nu,nu)+K*D_obsu); a=mat(obs.A) c=mat(obs.C) Cy=mat(Cy) tmp1=hstack((a-B_obsu*X*K*c,-B_obsu*X*Ke)) tmp2=hstack((zeros((ni,no)),eye(ni,ni))) A_ctr=vstack((tmp1,tmp2)) tmp1=hstack((zeros((no,ni)),-B_obsu*X*K*D_obsy+B_obsy)) tmp2=hstack((eye(ni,ni)*Ts,-Cy*Ts)) B_ctr=vstack((tmp1,tmp2)) C_ctr=hstack((-X*K*c,-X*Ke)) D_ctr=hstack((zeros((nu,ni)),-X*K*D_obsy)) contr=ss(A_ctr,B_ctr,C_ctr,D_ctr,sys.Tsamp) return contr
def testSnrFuncs(self): """test for signal to noise ratio functions""" # trivial data_triv = sp.ones((3, 10)) snr_triv_test = sp.ones(3) assert_equal( snr_peak(data_triv, 1.0), snr_triv_test) assert_equal( snr_power(data_triv, 1.0), snr_triv_test) assert_equal( snr_maha(data_triv, sp.eye(data_triv.shape[1])), snr_triv_test) # application data = sp.array([ sp.sin(sp.linspace(0.0, 2 * sp.pi, 100)), sp.sin(sp.linspace(0.0, 2 * sp.pi, 100)) * 2, sp.sin(sp.linspace(0.0, 2 * sp.pi, 100)) * 5, ]) assert_equal( snr_peak(data, 1.0), sp.absolute(data).max(axis=1)) assert_equal( snr_power(data, 1.0), sp.sqrt((data * data).sum(axis=1) / data.shape[1])) assert_almost_equal( snr_maha(data, sp.eye(data.shape[1])), sp.sqrt((data * data).sum(axis=1) / data.shape[1]))
def sample_moments( X, k ): """Get the sample moments from data""" N, d = X.shape # Partition X into two halves to independently estimate M2 and M3 X1, X2 = X[:N/2], X[N/2:] # Get the moments M1 = X1.mean(0) M1_ = X2.mean(0) M2 = Pairs( X1, X1 ) M3 = lambda theta: TriplesP( X2, X2, X2, theta ) #M3 = Triples( X2, X2, X2 ) # TODO: Ah, not computing sigma2! # Estimate \sigma^2 = k-th eigenvalue of M2 - mu mu^T sigma2 = svdvals( M2 - outer( M1, M1 ) )[k-1] assert( sc.isreal( sigma2 ) and sigma2 > 0 ) # P (M_2) is the best kth rank apprximation to M2 - sigma^2 I P = approxk( M2 - sigma2 * eye( d ), k ) B = matrix_tensorify( eye(d), M1_ ) T = lambda theta: M3(theta) - sigma2 * ( M1_.dot(theta) * eye( d ) + outer( M1_, theta ) + outer( theta, M1_ ) ) #T = M3 - sigma2 * ( B + B.swapaxes(2, 1) + B.swapaxes(2, 0) ) return P, T
def __iter__(self): dim = self.wrt.shape[0] I = scipy.eye(dim) # Square root of covariance matrix. A = scipy.eye(dim) center = self.wrt.copy() n_evals = 0 best_wrt = None best_x = float("-inf") for i, (args, kwargs) in enumerate(self.args): # Draw samples, evaluate and update best solution if a better one # was found. samples = scipy.random.standard_normal((self.batch_size, dim)) samples = scipy.dot(samples, A) + center fitnesses = [self.f(samples[j], *args, **kwargs) for j in range(samples.shape[0])] fitnesses = scipy.array(fitnesses).flatten() if fitnesses.max() > best_x: best_loss = fitnesses.max() self.wrt[:] = samples[fitnesses.argmax()] # Update center and variances. utilities = self.compute_utilities(fitnesses) center += scipy.dot(scipy.dot(utilities, samples), A) # TODO: vectorize this cov_gradient = sum([u * (scipy.outer(s, s) - I) for (s, u) in zip(samples, utilities)]) update = scipy.linalg.expm2(A * cov_gradient * self.step_rate * 0.5) A[:] = scipy.dot(A, update) yield dict(loss=-best_x, n_iter=i)
def __init__(self, evaluator, evaluable, **parameters): BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters) self.alphas = ones(self.numberOfCenters)/self.numberOfCenters self.mus = [] self.sigmas = [] self.tau = 1. if self.rangemins == None: self.rangemins = -ones(self.xdim) if self.rangemaxs == None: self.rangemaxs = ones(self.xdim) if self.initCovariances == None: self.initCovariances = eye(self.xdim) if self.elitist and self.numberOfCenters == 1 and not self.noisyEvaluator: # in the elitist case seperate evaluations are not necessary. # CHECKME: maybe in the noisy case? self.evalMus = False assert not(self.useCauchy and self.numberOfCenters > 1) for dummy in range(self.numberOfCenters): self.mus.append(rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins) self.sigmas.append(dot(eye(self.xdim), self.initCovariances)) self.reset()
def __init__(self, evaluator, evaluable, **parameters): BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters) self.numParams = self.xdim + self.xdim * (self.xdim+1) / 2 if self.momentum != None: self.momentumVector = zeros(self.numParams) if self.learningRateSigma == None: self.learningRateSigma = self.learningRate if self.rangemins == None: self.rangemins = -ones(self.xdim) if self.rangemaxs == None: self.rangemaxs = ones(self.xdim) if self.initCovariances == None: if self.diagonalOnly: self.initCovariances = ones(self.xdim) else: self.initCovariances = eye(self.xdim) self.x = rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins self.sigma = dot(eye(self.xdim), self.initCovariances) self.factorSigma = cholesky(self.sigma) self.reset()
def initialize_sequential(X_bar0, P_bar0, x_bar0): """ Generate t=0 values for a new iteration from an initial state, covariance and a-priori estimate. """ # Get initial state and STM and initialize integrator X_bar0_list = X_bar0.T.tolist()[0] stm0 = sp.matrix(sp.eye(18)) stm0_list = sp.eye(18).reshape(1,324).tolist()[0] eom = ode(Udot).set_integrator('dop853', atol=1.0E-10, rtol=1.0E-9) eom.set_initial_value(X_bar0_list + stm0_list, 0) # Perform measurement update for t=0 observation obs0 = OBS[0] stn0 = obs0[0] comp0, Htilda0 = Htilda_matrix(X_bar0_list, 0, stn0) resid0 = [ obs0[1] - float(comp0[0]), obs0[2] - float(comp0[1]) ] y0 = sp.matrix([resid0]).T K0 = P_bar0 * Htilda0.T * (Htilda0 * P_bar0 * Htilda0.T + W.I).I x_hat0 = x_bar0 + K0 * (y0 - Htilda0 * x_bar0) P0 = (I - K0 * Htilda0) * P_bar0 #P0 = (I - K0 * Htilda0) * P_bar0 * (I - K0 * Htilda0).T + K0 * W.I * K0.T return [stm0, comp0, resid0, Htilda0, x_hat0, P0, eom]
def testFill(self): """test buffer filling""" self.rb.fill(sp.eye(4)) self.assertEqual(len(self.rb), 6) for item in self.rb: assert_equal(item, sp.eye(4))
def _LMLgrad_covar_debug(self,covar): assert self.N*self.P<2000, 'gp2kronSum:: N*P>=2000' y = SP.reshape(self.Y,(self.N*self.P), order='F') K = SP.kron(self.Cg.K(),self.XX) K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N)) cholK = LA.cholesky(K).T Ki = LA.cho_solve((cholK,True),SP.eye(y.shape[0])) Kiy = LA.cho_solve((cholK,True),y) if covar=='Cr': n_params = self.Cr.getNumberParams() elif covar=='Cg': n_params = self.Cg.getNumberParams() elif covar=='Cn': n_params = self.Cn.getNumberParams() RV = SP.zeros(n_params) for i in range(n_params): #0. calc grad_i if covar=='Cg': C = self.Cg.Kgrad_param(i) Kgrad = SP.kron(C,self.XX) elif covar=='Cn': C = self.Cn.Kgrad_param(i) Kgrad = SP.kron(C,SP.eye(self.N)) #1. der of log det RV[i] = 0.5*(Ki*Kgrad).sum() #2. der of quad form RV[i] -= 0.5*(Kiy*SP.dot(Kgrad,Kiy)).sum() return RV
def compute_diagonal_loading(mat, svd, target_cond=SUFFICIENT_CONDITION, overwrite_mat=False): """tries to condition :mat: by imposing a spherical constraint on the covariance ellipsoid (adding alpha*eye) solves: cond(mat + alpha*I) = target_cond for alpha Note: this is a noop if the condition is already >= target_cond! :type mat: ndarray :param mat: input matrix :type svd: tuple :param svd: return tuple of svd(:mat:) - consistency will not be checked! :type target_cond: float :param target_cond: condition number to archive after loading :type overwrite_mat: bool :param overwrite_mat: if True, operate inplace and overwrite :mat: :returns: ndarray - matrix like :mat: conditioned s.t. cond = target_cond """ sv = svd[1] if target_cond == 1.0: return sp.eye(mat.shape[0], mat.shape[1]) if target_cond > compute_matrix_cond(sv): return mat if overwrite_mat is True: rval = mat else: rval = mat.copy() alpha = (sv[0] - target_cond * sv[-1]) / (target_cond - 1) return rval + alpha * sp.eye(rval.shape[0], rval.shape[1])
def LMLdebug(self): """ LML function for debug """ assert self.N*self.P<5000, 'gp2kronSum:: N*P>=5000' y = SP.reshape(self.Y,(self.N*self.P), order='F') V = SP.kron(SP.eye(self.P),self.F) XX = SP.dot(self.Xr,self.Xr.T) K = SP.kron(self.Cr.K(),XX) K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N)) # inverse of K cholK = LA.cholesky(K) Ki = LA.cho_solve((cholK,False),SP.eye(self.N*self.P)) # Areml and inverse Areml = SP.dot(V.T,SP.dot(Ki,V)) cholAreml = LA.cholesky(Areml) Areml_i = LA.cho_solve((cholAreml,False),SP.eye(self.K*self.P)) # effect sizes and z b = SP.dot(Areml_i,SP.dot(V.T,SP.dot(Ki,y))) z = y-SP.dot(V,b) Kiz = SP.dot(Ki,z) # lml lml = y.shape[0]*SP.log(2*SP.pi) lml += 2*SP.log(SP.diag(cholK)).sum() lml += 2*SP.log(SP.diag(cholAreml)).sum() lml += SP.dot(z,Kiz) lml *= 0.5 return lml
def _maximum_likelihood(self, X): n_samples, n_features = X.shape if X.ndim > 1 else (1, X.shape[0]) n_components = self.n_components # Predict mean mu = X.mean(axis=0) # Predict covariance cov = sp.cov(X, rowvar=0) eigvals, eigvecs = self._eig_decomposition(cov) sigma2 = ((sp.sum(cov.diagonal()) - sp.sum(eigvals.sum())) / (n_features - n_components)) # FIXME: M < D? weight = sp.dot(eigvecs, sp.diag(sp.sqrt(eigvals - sigma2))) M = sp.dot(weight.T, weight) + sigma2 * sp.eye(n_components) inv_M = spla.inv(M) self.eigvals = eigvals self.eigvecs = eigvecs self.predict_mean = mu self.predict_cov = sp.dot(weight, weight.T) + sigma2 * sp.eye(n_features) self.latent_mean = sp.transpose(sp.dot(inv_M, sp.dot(weight.T, X.T - mu[:, sp.newaxis]))) self.latent_cov = sigma2 * inv_M self.sigma2 = sigma2 # FIXME! self.weight = weight self.inv_M = inv_M return self.latent_mean
def sqrtm3(X): M = sp.copy(X) m, fb, fe = block_structure(M) n = M.shape[0] for i in range(0,m): M[fb[i]:fe[i],fb[i]:fe[i]] = twobytworoot(M[fb[i]:fe[i],fb[i]:fe[i]]) #print M for j in range(1,m): for i in range(0,m-j): #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]] JJ = i+j Tnoto = M[fb[i]:fe[i],fb[JJ]:fe[JJ]] #dopo togliere il copy #print "Tnot: " #print Tnoto for k in range(i+1,JJ): Tnoto -= (M[fb[i]:fe[i],fb[k]:fe[k]]).dot(M[fb[k]:fe[k],fb[JJ]:fe[JJ]]) #print M[fb[i]:fe[i],fb[k]:fe[k]] #print M[fb[k]:fe[k],fb[JJ]:fe[JJ]] if((M[fb[i]:fe[i],fb[JJ]:fe[JJ]]).shape==(1,1)): #print "forma 1" #print M[fb[i]:fe[i],fb[JJ]:fe[JJ]] # Uij #print M[fb[i]:fe[i],fb[i]:fe[i]] # Uii #print M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]] # Ujj M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = Tnoto/(M[fb[i]:fe[i],fb[i]:fe[i]] + M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]]) else: Uii = M[fb[i]:fe[i],fb[i]:fe[i]] Ujj = M[fb[JJ]:fe[JJ],fb[JJ]:fe[JJ]] shapeUii = Uii.shape[0] shapeUjj = Ujj.shape[0] """ print "------------" print Tnoto print Tnoto.shape print sp.kron(sp.eye(shapeUjj),Uii) print sp.kron(Ujj.T,sp.eye(shapeUii)) print Tnoto """ #M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = sp.linalg.solve_sylvester(Uii, Ujj, Tnoto) """ x, scale, info = dtrsyl(Uii, Ujj, Tnoto if (scale==1.0): = x else: M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = x*scale print "scale!=0" """ Tnoto = Tnoto.reshape((shapeUii*shapeUjj),1,order="F") M[fb[i]:fe[i],fb[JJ]:fe[JJ]] = \ linalg.solve(sp.kron(sp.eye(shapeUjj),Uii) + sp.kron(Ujj.T,sp.eye(shapeUii)), Tnoto).reshape(shapeUii,shapeUjj,order="F") return M
def __init__(self, basef, translate=True, rotate=False, conditioning=None, asymmetry=None, oscillate=False, penalize=None, ): FunctionEnvironment.__init__(self, basef.xdim, basef.xopt) self.desiredValue = basef.desiredValue self.toBeMinimized = basef.toBeMinimized if translate: self.xopt = (rand(self.xdim) - 0.5) * 9.8 self._diags = eye(self.xdim) self._R = eye(self.xdim) self._Q = eye(self.xdim) if conditioning is not None: self._diags = generateDiags(conditioning, self.xdim) if rotate: self._R = orth(rand(basef.xdim, basef.xdim)) if conditioning: self._Q = orth(rand(basef.xdim, basef.xdim)) tmp = lambda x: dot(self._Q, dot(self._diags, dot(self._R, x-self.xopt))) if asymmetry is not None: tmp2 = tmp tmp = lambda x: asymmetrify(tmp2(x), asymmetry) if oscillate: tmp3 = tmp tmp = lambda x: oscillatify(tmp3(x)) self.f = lambda x: basef.f(tmp(x))
def GetMat(self,s,sym=False): """Return the element transfer matrix for the TorsionalSpringDamper element. If sym=True, 's' must be a symbolic string and a matrix of strings will be returned. Otherwise, 's' is a numeric value (probably complex) and the matrix returned will be complex.""" N=self.maxsize if sym: myparams=self.symparams else: myparams=self.params k=myparams['k'] c=myparams['c'] springterm=1/(k[0]+c[0]*s) if sym: maxlen=len(springterm)+10 matout=eye(N,dtype='f') matout=matout.astype('S%d'%maxlen) else: matout=eye(N,dtype='D') matout[1,2]=springterm if max(shape(k))>1 and self.maxsize>=8: matout[5,6]=1/(k[1]+c[1]*s) if max(shape(k))>2 and self.maxsize>=12: matout[9,10]=1/(k[2]+c[2]*s) return matout
def GetAugMat(self, s, sym=False): """Return the augmented element transfer matrix for the AVSwThetaFB element. If sym=True, 's' must be a symbolic string and a matrix of strings will be returned. Otherwise, 's' is a numeric value (probably complex) and the matrix returned will be complex.""" N = self.maxsize if N % 2: N = N - 1 # matout=eye(N+1,'d')+0.0j if sym: myparams = self.symparams matout = eye(N + 1, dtype="f") matout = matout.astype("S30") else: matout = eye(N + 1, dtype="D") myparams = self.params # matout=eye(N+1,dtype='D') myrow = (self.params["axis"] - 1) * 4 + 1 # axis should be 1, 2, or 3 mycol = myrow + 1 Gc = myparams["Gc"] gain = myparams["Ka"] c = myparams["c"] k = myparams["ks"] if myparams.has_key("tau"): tau = myparams["tau"] Gp = gain * tau / (s * (s + tau)) else: Gp = gain / s actpart = Gc * Gp / (Gc * Gp + 1.0) flexpart = 1.0 / ((Gc * Gp + 1) * (c * s + k)) matout[myrow, N] = actpart matout[myrow, mycol] = flexpart return matout
def find_homog_trans(points_a, points_b, err_threshold=0, rot_0=None): """Finds a homogeneous transformation matrix that, when applied to the points in points_a, minimizes the squared Euclidean distance between the transformed points and the corresponding points in points_b. Both points_a and points_b are (n, 3) arrays. """ #Align the centroids of the two point clouds cent_a = sp.average(points_a, axis=0) cent_b = sp.average(points_b, axis=0) points_a = points_a - cent_a points_b = points_b - cent_b #Define the error as a function of a rotation vector in R^3 rot_cost = lambda rot: (sp.dot(vec_to_rot(rot), points_a.T).T - points_b).flatten()**2 #Run the optimization if rot_0 == None: rot_0 = sp.zeros(3) rot = opt.leastsq(rot_cost, rot_0)[0] #Compute the final homogeneous transformation matrix homog_1 = sp.eye(4) homog_1[0:3, 3] = -cent_a homog_2 = sp.eye(4) homog_2[0:3,0:3] = vec_to_rot(rot) homog_3 = sp.eye(4) homog_3[0:3,3] = cent_b homog = sp.dot(homog_3, sp.dot(homog_2, homog_1)) return homog, rot
def GetAugMat(self, s, sym=False): """Return the augmented element transfer matrix for the AVS1_kp element.""" N = self.maxsize if sym: myparams = self.symparams matout = eye(N + 1, dtype="f") matout = matout.astype("S30") else: matout = eye(N + 1, dtype="D") myparams = self.params myrow = 1 # hard coding for now#(self.params['axis']-1)*4+1#axis should be 1, 2, or 3 Gact = self.Gact_func(s, self.params) Gth = self.kp k_spring = self.params["k_spring"] c_spring = self.params["c_spring"] H = self.params["H"] term1 = 1.0 / ((1.0 + Gact * Gth * H) * (k_spring + c_spring * s)) term2 = Gact * Gth / (1.0 + Gact * Gth * H) # term1 = 1.0/(k_spring + c_spring*s + Gact*Gth*k_spring + Gact*Gth*c_spring*s) # term2 = Gact*Gth/(1.0 + Gact*Gth) myrow = 1 # hard coding for now#(self.params['axis']-1)*4+1#axis should be 1, 2, or 3 matout[myrow, 2] = term1 matout[myrow, N] = term2 return matout
def xNES(f, x0, maxEvals=1e6, verbose=False, targetFitness= -1e-10): """ Exponential NES (xNES), as described in Glasmachers, Schaul, Sun, Wierstra and Schmidhuber (GECCO'10). Maximizes a function f. Returns (best solution found, corresponding fitness). """ dim = len(x0) I = eye(dim) learningRate = 0.6 * (3 + log(dim)) / dim / sqrt(dim) batchSize = 4 + int(floor(3 * log(dim))) center = x0.copy() A = eye(dim) # sqrt of the covariance matrix numEvals = 0 bestFound = None bestFitness = -Inf while numEvals + batchSize <= maxEvals and bestFitness < targetFitness: # produce and evaluate samples samples = [randn(dim) for _ in range(batchSize)] fitnesses = [f(dot(A, s) + center) for s in samples] if max(fitnesses) > bestFitness: bestFitness = max(fitnesses) bestFound = samples[argmax(fitnesses)] numEvals += batchSize if verbose: print "Step", numEvals / batchSize, ":", max(fitnesses), "best:", bestFitness #print A # update center and variances utilities = computeUtilities(fitnesses) center += dot(A, dot(utilities, samples)) covGradient = sum([u * (outer(s, s) - I) for (s, u) in zip(samples, utilities)]) A = dot(A, expm2(0.5 * learningRate * covGradient)) return bestFound, bestFitness
def _additionalInit(self): assert self.numberOfCenters == 1, 'Mixtures of Gaussians not supported yet.' xdim = self.numParameters self.alphas = ones(self.numberOfCenters) / float(self.numberOfCenters) self.mus = [] self.sigmas = [] if self.rangemins == None: self.rangemins = -ones(xdim) if self.rangemaxs == None: self.rangemaxs = ones(xdim) if self.initCovariances == None: if self.diagonalOnly: self.initCovariances = ones(xdim) else: self.initCovariances = eye(xdim) for _ in range(self.numberOfCenters): self.mus.append(rand(xdim) * (self.rangemaxs - self.rangemins) + self.rangemins) self.sigmas.append(dot(eye(xdim), self.initCovariances)) self.samples = list(range(self.windowSize)) self.fitnesses = zeros(self.windowSize) self.generation = 0 self.allsamples = [] self.muevals = [] self.allmus = [] self.allsigmas = [] self.allalphas = [] self.allUpdateSizes = [] self.allfitnesses = [] self.meanShifts = [zeros((self.numParameters)) for _ in range(self.numberOfCenters)] self._oneEvaluation(self._initEvaluable)
def getVW(self, A): # V --> m, W --> n #print self.data MLU = linalg.lu_factor(c_[r_[A,transpose(self.data.C)], r_[self.data.B,self.data.D]]) V = linalg.lu_solve(MLU,r_[zeros((self.data.n,self.data.q), Float), eye(self.data.q)]) W = linalg.lu_solve(MLU,r_[zeros((self.data.m,self.data.p), Float), eye(self.data.p)],trans=1) return V, W
def train(self, x, y, mu=None, sig=None): # Initialization n = y.shape[0] C = int(y.max()) eps = sp.finfo(sp.float64).eps if (mu is None) and (self.mu is None): mu = 10 ** (-7) elif self.mu is None: self.mu = mu if (sig is None) and (self.sig is None): self.sig = 0.5 elif self.sig is None: self.sig = sig # Compute K and K = KERNEL() K.compute_kernel(x, sig=self.sig) G = KERNEL() G.K = self.mu * sp.eye(n) for i in range(C): t = sp.where(y == (i + 1))[0] self.ni.append(sp.size(t)) self.prop.append(float(self.ni[i]) / n) # Compute K_k Ki = KERNEL() Ki.compute_kernel(x, z=x[t, :], sig=self.sig) T = sp.eye(self.ni[i]) - sp.ones((self.ni[i], self.ni[i])) Ki.K = sp.dot(Ki.K, T) del T G.K += sp.dot(Ki.K, Ki.K.T) / self.ni[i] G.scale_kernel(C) # Solve the generalized eigenvalue problem a, A = linalg.eigh(G.K, b=K.K) idx = a.argsort()[::-1] a = a[idx] A = A[:, idx] # Remove negative eigenvalue t = sp.where(a > eps)[0] a = a[t] A = A[:, t] # Normalize the eigenvalue for i in range(a.size): A[:, i] /= sp.sqrt(sp.dot(sp.dot(A[:, i].T, K.K), A[:, i])) # Update model self.a = a.copy() self.A = A.copy() self.S = sp.dot(sp.dot(self.A, sp.diag(self.a ** (-1))), self.A.T) # Free memory del G, K, a, A
def Example1(): a = sp.array([[1, 2, 3], [4, 5, 6]]) b = sp.array([i * i for i in range(100) if i % 2 == 1]) c = b.tolist() print a, b, c a = sp.zeros(100) # a 100-element array of float zeros b = sp.zeros((2, 8), int) # a 2x8 array of int zeros c = sp.zeros((2, 2, 2), complex) # a NxMxL array of complex zeros print a, b, c a = sp.ones(10, int) # a 10-element array of int ones b = sp.pi * sp.ones( (5, 5)) # a useful way to fill up an array with a specified value print a, b id = sp.eye(10, 10, dtype=int) # 10x10 identity matrix (1's on diagonal) offdiag = sp.eye(10, 10, 1) + sp.eye(10, 10, -1) # off diagonal elements = 1 print id, offdiag a = sp.array([[1, 2, 3], [4, 5, 6]]) b = sp.transpose(a) # reverse dimensions of a (even for dim > 2) b = a.T # equivalent to scipy.transpose(a) c = sp.swapaxes(a, 0, 1) # swap specified axes print a, b, c a = sp.arange( 1, 10, 1) # like Python range, but with (potentially) real-valued arrays b = sp.linspace( 1, 10, 11 ) # create array of equally-spaced points based on specifed number of points print a, b a = sp.random.random( (100, 100)) # 100x100 array of floats uniform on [0.,1.) b = sp.random.randint( 0, 10, (100, ) ) # 100 random ints uniform on [0, 10), i.e., not including the upper bound 10 c = sp.random.standard_normal( (5, 5, 5) ) # zero-mean, unit-variance Gaussian random numbers in a 5x5x5 array print a, b, c elem = c[1, 1, 1] # equiv. to a[i][j][k] but presumably more efficient print elem i = sp.array([0, 1, 2, 1]) # array of indices for the first axis j = sp.array([1, 2, 3, 4]) # array of indices for the second axis print a[i, j] # return array([a[0,1], a[1,2], a[2,3], a[1,4]]) c = sp.linspace(1, 10, 11) b = sp.array([True, False, True, False]) print c[b] last_elem = c[-1] # the last element of the array print last_elem section = a[10:20, 30:40] # 10x10 subblock starting at [10,30] print section asection = a[10:, 30:] # missing stop index implies until end of array bsection = a[:10, :30] # missing start index implies until start of array print asection, bsection x = a[:, 0] # get everything in the 0th column (missing start and stop) y = a[:, 1] # get everything in the 1st column print x, y s = sp.sum(a) # sum all elements in a, returning a scalar s0 = sp.sum( a, axis=0 ) # sum elements along specified axis (=0), returning an array of remaining shape, e.g., a = sp.ones((10, 20, 30)) a0 = sp.sum(a, axis=0) # s0 has shape (20,30) print s, s0, a, a0 a = sp.array([[1, 2, 3], [4, 5, 6]]) m = sp.mean( a, axis=0 ) # compute mean along the specified axis (over entire array if axis=None) s = sp.std( a, axis=0 ) # compute standard deviation along the specified axis (over entire array if axis=None) print m, s s0 = sp.cumsum( a, axis=0 ) # cumulatively sum over 0 axis, returning array with same shape as a s1 = sp.cumsum( a ) # cumulatively sum over 0 axis, returning 1D array of length shape[0]*shape[1]*...*shape[dim-1] print s0, s1
def run(args): # Load gene-index map arrs = [l.rstrip().split() for l in open(args.gene_index_file)] index2gene = dict((int(arr[0]), arr[1]) for arr in arrs) G = nx.Graph() G.add_nodes_from(index2gene.values()) # in case any nodes have degree zero # Load graph print "* Loading PPI..." edges = [ map(int, l.rstrip().split()[:2]) for l in open(args.edgelist_file) ] G.add_edges_from([(index2gene[u], index2gene[v]) for u, v in edges]) print "\t- Edges:", len(G.edges()) print "\t- Nodes:", len(G.nodes()) # Remove self-loops and restrict to largest connected component print "* Removing self-loops, multi-edges, and restricting to", print "largest connected component..." self_loops = [(u, v) for u, v in G.edges() if u == v] G.remove_edges_from(self_loops) G = G.subgraph( sorted(nx.connected_components(G), key=lambda cc: len(cc), reverse=True)[0]) nodes = sorted(G.nodes()) n = len(nodes) print "\t- Largest CC Edges:", len(G.edges()) print "\t- Largest CC Nodes:", len(G.nodes()) # Set up output directory print "* Saving updated graph to file..." os.system('mkdir -p ' + args.output_dir) output_prefix = "{}/{}".format(args.output_dir, args.prefix) pprfile = "{}_ppr_{:g}.mat".format(output_prefix, args.alpha) # Index mapping for genes index_map = [ "{} {}".format(i + args.start_index, nodes[i]) for i in range(n) ] open("{}_index_genes".format(output_prefix), 'w').write("\n".join(index_map)) # Edge list edges = [ sorted([ nodes.index(u) + args.start_index, nodes.index(v) + args.start_index ]) for u, v in G.edges() ] edgelist = ["{} {} 1".format(u, v) for u, v in edges] open("{}_edge_list".format(output_prefix), 'w').write("\n".join(edgelist)) ## Create the PPR matrix either using Scipy or MATLAB # Create "walk" matrix (normalized adjacency matrix) print "* Creating PPR matrix..." W = nx.to_numpy_matrix(G, nodelist=nodes, dtype='f') W = W / W.sum(axis=1) # normalization step if not args.matlab: ## Create PPR matrix using Python from scipy.linalg import inv PPR = (1.0 - args.alpha) * inv(sp.eye(n) - args.alpha * sp.transpose(W)) scipy.io.savemat(pprfile, dict(PPR=PPR), oned_as='column') else: ## Create PPR matrix using MATLAB # Set up a params file params = dict(W=W, outputfile=pprfile, alpha=args.alpha) scipy.io.savemat("params.mat", params, oned_as='column') # Run the MATLAB script, then cleanup the params file os.system( 'matlab -nojvm -nodisplay -nodesktop -nosplash < createPPRMat.m') os.system('rm params.mat')
return min(1, (1 + c) * eta_sigma) if __name__ == '__main__': ''' Example of usage code ''' import time np.random.seed(42) random.seed(42) def f(x): # sin(x^2+y^2)/(x^2+y^2) r = sum(square(x)) return sp.sin(r) / r mu = array([9999., -9999.]) # a bad init guess amat = eye(2) # when adasam, use conservative eta xnes = XNES(f, mu, amat, npop=50, use_adasam=True, eta_bmat=0.01, eta_sigma=.1, patience=9999) t0 = time.time() for i in range(20): xnes.step(100) print "Current: ({},{})".format(*xnes.mu)
def _X(self): L = la.cholesky(self._UU()).T Li = la.inv(L) M = la.cholesky(sp.dot(L.T, L) + sp.eye(L.shape[0])).T return sp.dot(Li.T, sp.dot(M - sp.eye(M.shape[0]), Li))
@cached('Yres') def Yres(self): return self.Y - self.predict_in_sample() @cached('Yres') def yres(self): r = vec(self.Yres()) if self._miss: r = r[~self._veIok] return r if __name__ == '__main__': # define phenotype N = 1000 P = 4 Y = sp.randn(N, P) # define fixed effects F = [] A = [] F.append(sp.randn(N, 3)) F.append(sp.randn(N, 2)) A.append(sp.eye(P)) A.append(sp.ones((1, P))) pdb.set_trace() mean = MeanKronSum(Y, F, A)
def simple_interaction_kronecker(snps, phenos, covs=None, Acovs=None, Asnps1=None, Asnps0=None, K1r=None, K1c=None, K2r=None, K2c=None, covar_type='lowrank_diag', rank=1, NumIntervalsDelta0=100, NumIntervalsDeltaAlt=0, searchDelta=False): """ I-variate fixed effects interaction test for phenotype specific SNP effects Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) phenos: [N x P] SP.array of P phenotypes for N individuals covs: list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i] Acovs: list of SP.arrays holding the phenotype design matrices for covariates. Each covs[i] has one corresponding Acovs[i]. Asnps1: list of SP.arrays of I interaction variables to be tested for N individuals. Note that it is assumed that Asnps0 is already included. If not provided, the alternative model will be the independent model Asnps0: single SP.array of I0 interaction variables to be included in the background model when testing for interaction with Inters K1r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K1c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covar_type: type of covaraince to use. Default 'freeform'. possible values are 'freeform': free form optimization, 'fixed': use a fixed matrix specified in covar_K0, 'diag': optimize a diagonal matrix, 'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'block': optimize the weight of a constant P x P block matrix of ones, 'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix, 'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix, rank: rank of a possible lowrank component (default 1) NumIntervalsDelta0: number of steps for delta optimization on the null model (100) NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization) searchDelta: Carry out delta optimization on the alternative model? if yes We use NumIntervalsDeltaAlt steps Returns: pv: P-values of the interaction test pv0: P-values of the null model pvAlt: P-values of the alternative model """ S = snps.shape[1] #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r == None: K1r = SP.dot(snps, snps.T) else: assert K1r.shape[0] == N, 'K1r: dimensions dismatch' assert K1r.shape[1] == N, 'K1r: dimensions dismatch' if K2r == None: K2r = SP.eye(N) else: assert K2r.shape[0] == N, 'K2r: dimensions dismatch' assert K2r.shape[1] == N, 'K2r: dimensions dismatch' covs, Acovs = updateKronCovs(covs, Acovs, N, P) #Asnps can be several designs if (Asnps0 is None): Asnps0 = [SP.ones([1, P])] if Asnps1 is None: Asnps1 = [SP.eye([P])] if (type(Asnps0) != list): Asnps0 = [Asnps0] if (type(Asnps1) != list): Asnps1 = [Asnps1] assert (len(Asnps0) == 1) and ( len(Asnps1) > 0), "need at least one Snp design matrix for null and alt model" #one row per column design matrix pv = SP.zeros((len(Asnps1), snps.shape[1])) lrt = SP.zeros((len(Asnps1), snps.shape[1])) pvAlt = SP.zeros((len(Asnps1), snps.shape[1])) lrtAlt = SP.zeros((len(Asnps1), snps.shape[1])) #1. run GP model to infer suitable covariance structure if K1c == None or K2c == None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, covar_type=covar_type, rank=rank) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: assert K1c.shape[0] == P, 'K1c: dimensions dismatch' assert K1c.shape[1] == P, 'K1c: dimensions dismatch' assert K2c.shape[0] == P, 'K2c: dimensions dismatch' assert K2c.shape[1] == P, 'K2c: dimensions dismatch' #2. run kroneckerLMM for null model lmm = limix.CKroneckerLMM() lmm.setK1r(K1r) lmm.setK1c(K1c) lmm.setK2r(K2r) lmm.setK2c(K2c) lmm.setSNPs(snps) #add covariates for ic in range(len(Acovs)): lmm.addCovariates(covs[ic], Acovs[ic]) lmm.setPheno(phenos) #delta serch on alt. model? if searchDelta: lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt) lmm.setNumIntervals0_inter(NumIntervalsDeltaAlt) else: lmm.setNumIntervalsAlt(0) lmm.setNumIntervals0_inter(0) lmm.setNumIntervals0(NumIntervalsDelta0) #add SNP design lmm.setSNPcoldesign0_inter(Asnps0[0]) for iA in range(len(Asnps1)): lmm.setSNPcoldesign(Asnps1[iA]) lmm.process() pvAlt[iA, :] = lmm.getPv()[0] pv[iA, :] = lmm.getPv()[1] pv0 = lmm.getPv()[2] return pv, pv0, pvAlt
# 构造增广矩阵 C = np.concatenate((A,b),1) # 系数针有固定列时,QR分解 [Q,R] = Sci.linalg.qr(C) CR = R[1:,1:] # 奇异值分解简称(SVD) U1, S1, Vh = Sci.linalg.svd(CR) N1 = Vh.T # 求LSTLS参数 a0 = np.matrix(0) a1 = np.concatenate((a0,Sci.zeros((1,n-1))),1) a2 = np.concatenate((Sci.zeros((n-1,1)),Sci.eye(n-1)),1) a3 = np.concatenate((a1,a2),0) xLSTLS =(A.H*A-np.power(S1[n-1,],2)*a3).I*A.H*b # 求LSTLS残差矩阵 test1 = U1[0:m-1,n-1].reshape(-1,1) test2 = N1[n-1,0:].reshape(1,-1) test3 = Sci.zeros((m-1,1)) test4 = test1*S1[n-1,]*test2 b1 = np.concatenate((a0,Sci.zeros((1,n))),1) b2 = np.concatenate((test3,test4),1) b3 = np.concatenate((b1,b2),0) CCLSTLS = Q.conj().T*b3
def forward_lmm_kronecker(snps, phenos, Asnps=None, Acond=None, K1r=None, K1c=None, K2r=None, K2c=None, covs=None, Acovs=None, threshold=5e-8, maxiter=2, qvalues=False, update_covariances=False, **kw_args): """ Kronecker fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x P] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) qvalues: Use q-value threshold and return q-values in addition (default False) update_covar: Boolean indicator if covariances should be re-estimated after each forward step (default False) Returns: lm: lmix LMMi object resultStruct with elements: iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations Optional: corresponding q-values qvadded qvall """ #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r == None: K1r = SP.dot(snps, snps.T) else: assert K1r.shape[0] == N, 'K1r: dimensions dismatch' assert K1r.shape[1] == N, 'K1r: dimensions dismatch' if K2r == None: K2r = SP.eye(N) else: assert K2r.shape[0] == N, 'K2r: dimensions dismatch' assert K2r.shape[1] == N, 'K2r: dimensions dismatch' covs, Acovs = updateKronCovs(covs, Acovs, N, P) if Asnps is None: Asnps = [SP.ones([1, P])] if (type(Asnps) != list): Asnps = [Asnps] assert len(Asnps) > 0, "need at least one Snp design matrix" if Acond is None: Acond = Asnps if (type(Acond) != list): Acond = [Acond] assert len(Acond) > 0, "need at least one Snp design matrix" #1. run GP model to infer suitable covariance structure if K1c == None or K2c == None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, **kw_args) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: vc = None assert K1c.shape[0] == P, 'K1c: dimensions dismatch' assert K1c.shape[1] == P, 'K1c: dimensions dismatch' assert K2c.shape[0] == P, 'K2c: dimensions dismatch' assert K2c.shape[1] == P, 'K2c: dimensions dismatch' t0 = time.time() lm, pv = kronecker_lmm(snps=snps, phenos=phenos, Asnps=Asnps, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs) #get pv #start stuff iadded = [] pvadded = [] qvadded = [] time_el = [] pvall = SP.zeros((pv.shape[0] * maxiter, pv.shape[1])) qvall = None t1 = time.time() print(("finished GWAS testing in %.2f seconds" % (t1 - t0))) time_el.append(t1 - t0) pvall[0:pv.shape[0], :] = pv imin = SP.unravel_index(pv.argmin(), pv.shape) score = pv[imin].min() niter = 1 if qvalues: assert pv.shape[ 0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter, snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1, :] = qv score = qv[imin] #loop: while (score < threshold) and niter < maxiter: t0 = time.time() pvadded.append(pv[imin]) iadded.append(imin) if qvalues: qvadded.append(qv[imin]) if update_covariances and vc is not None: vc.addFixedTerm(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]]) vc.setScales( ) #CL: don't know what this does, but findLocalOptima crashes becahuse vc.noisPos=None vc.findLocalOptima(fast=True) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) lm.setK1c(K1c) lm.setK2c(K2c) lm.addCovariates(snps[:, imin[1]:(imin[1] + 1)], Acond[imin[0]]) for i in range(len(Asnps)): #add SNP design lm.setSNPcoldesign(Asnps[i]) lm.process() pv[i, :] = lm.getPv()[0] pvall[niter * pv.shape[0]:(niter + 1) * pv.shape[0]] = pv imin = SP.unravel_index(pv.argmin(), pv.shape) if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter + 1, :] = qv score = qv[imin].min() else: score = pv[imin].min() t1 = time.time() print(("finished GWAS testing in %.2f seconds" % (t1 - t0))) time_el.append(t1 - t0) niter = niter + 1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall RV['time_el'] = time_el if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm, RV
def kronecker_lmm(snps, phenos, covs=None, Acovs=None, Asnps=None, K1r=None, K1c=None, K2r=None, K2c=None, covar_type='lowrank_diag', rank=1, NumIntervalsDelta0=100, NumIntervalsDeltaAlt=0, searchDelta=False): """ simple wrapper for kroneckerLMM code Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) phenos: [N x P] SP.array of P phenotypes for N individuals covs: list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i] Acovs: list of SP.arrays holding the phenotype design matrices for covariates. Each covs[i] has one corresponding Acovs[i]. Asnps: single SP.array of I0 interaction variables to be included in the background model when testing for interaction with Inters If not provided, the alternative model will be the independent model K1r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K1c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covar_type: type of covaraince to use. Default 'freeform'. possible values are 'freeform': free form optimization, 'fixed': use a fixed matrix specified in covar_K0, 'diag': optimize a diagonal matrix, 'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'block': optimize the weight of a constant P x P block matrix of ones, 'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix, 'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix, rank: rank of a possible lowrank component (default 1) NumIntervalsDelta0: number of steps for delta optimization on the null model (100) NumIntervalsDeltaAlt:number of steps for delta optimization on the alt. model (0 - no optimization) searchDelta: Boolean indicator if delta is optimized during SNP testing (default False) Returns: CKroneckerLMM object P-values for all SNPs from liklelihood ratio test """ #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r == None: K1r = SP.dot(snps, snps.T) else: assert K1r.shape[0] == N, 'K1r: dimensions dismatch' assert K1r.shape[1] == N, 'K1r: dimensions dismatch' if K2r == None: K2r = SP.eye(N) else: assert K2r.shape[0] == N, 'K2r: dimensions dismatch' assert K2r.shape[1] == N, 'K2r: dimensions dismatch' covs, Acovs = updateKronCovs(covs, Acovs, N, P) #Asnps can be several designs if Asnps is None: Asnps = [SP.ones([1, P])] if (type(Asnps) != list): Asnps = [Asnps] assert len(Asnps) > 0, "need at least one Snp design matrix" #one row per column design matrix pv = SP.zeros((len(Asnps), snps.shape[1])) #1. run GP model to infer suitable covariance structure if K1c == None or K2c == None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, covar_type=covar_type, rank=rank) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: assert K1c.shape[0] == P, 'K1c: dimensions dismatch' assert K1c.shape[1] == P, 'K1c: dimensions dismatch' assert K2c.shape[0] == P, 'K2c: dimensions dismatch' assert K2c.shape[1] == P, 'K2c: dimensions dismatch' #2. run kroneckerLMM lmm = limix.CKroneckerLMM() lmm.setK1r(K1r) lmm.setK1c(K1c) lmm.setK2r(K2r) lmm.setK2c(K2c) lmm.setSNPs(snps) #add covariates for ic in range(len(Acovs)): lmm.addCovariates(covs[ic], Acovs[ic]) lmm.setPheno(phenos) #delta serch on alt. model? if searchDelta: lmm.setNumIntervalsAlt(NumIntervalsDeltaAlt) else: lmm.setNumIntervalsAlt(0) lmm.setNumIntervals0(NumIntervalsDelta0) for iA in range(len(Asnps)): #add SNP design lmm.setSNPcoldesign(Asnps[iA]) lmm.process() pv[iA, :] = lmm.getPv()[0] return lmm, pv
def forward_lmm(snps, pheno, K=None, covs=None, qvalues=False, threshold=5e-8, maxiter=2, test='lrt', **kw_args): """ univariate fixed effects test with forward selection Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno: [N x 1] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals threshold: (float) P-value thrashold for inclusion in forward selection (default 5e-8) maxiter: (int) maximum number of interaction scans. First scan is without inclusion, so maxiter-1 inclusions can be performed. (default 2) test: 'lrt' for likelihood ratio test (default) or 'f' for F-test Returns: lm: limix LMM object iadded: array of indices of SNPs included in order of inclusion pvadded: array of Pvalues obtained by the included SNPs in iteration before inclusion pvall: [maxiter x S] SP.array of Pvalues for all iterations """ if K is None: K = SP.eye(snps.shape[0]) if covs is None: covs = SP.ones((snps.shape[0], 1)) lm = simple_lmm(snps, pheno, K=K, covs=covs, test=test, **kw_args) pvall = SP.zeros((maxiter, snps.shape[1])) pv = lm.getPv() pvall[0:1, :] = pv imin = pv.argmin() niter = 1 #start stuff iadded = [] pvadded = [] qvadded = [] if qvalues: assert pv.shape[ 0] == 1, "This is untested with the fdr package. pv.shape[0]==1 failed" qvall = SP.zeros((maxiter, snps.shape[1])) qv = FDR.qvalues(pv) qvall[0:1, :] = qv score = qv.min() else: score = pv.min() while (score < threshold) and niter < maxiter: t0 = time.time() iadded.append(imin) pvadded.append(pv[0, imin]) if qvalues: qvadded.append(qv[0, imin]) covs = SP.concatenate((covs, snps[:, imin:(imin + 1)]), 1) lm.setCovs(covs) lm.process() pv = lm.getPv() pvall[niter:niter + 1, :] = pv imin = pv.argmin() if qvalues: qv = FDR.qvalues(pv) qvall[niter:niter + 1, :] = qv score = qv.min() else: score = pv.min() t1 = time.time() print(("finished GWAS testing in %.2f seconds" % (t1 - t0))) niter = niter + 1 RV = {} RV['iadded'] = iadded RV['pvadded'] = pvadded RV['pvall'] = pvall if qvalues: RV['qvall'] = qvall RV['qvadded'] = qvadded return lm, RV
def simple_interaction_kronecker_deprecated(snps, phenos, covs=None, Acovs=None, Asnps1=None, Asnps0=None, K1r=None, K1c=None, K2r=None, K2c=None, covar_type='lowrank_diag', rank=1, searchDelta=False): """ I-variate fixed effects interaction test for phenotype specific SNP effects. (Runs multiple likelihood ratio tests and computes the P-values in python from the likelihood ratios) Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) phenos: [N x P] SP.array of P phenotypes for N individuals covs: list of SP.arrays holding covariates. Each covs[i] has one corresponding Acovs[i] Acovs: list of SP.arrays holding the phenotype design matrices for covariates. Each covs[i] has one corresponding Acovs[i]. Asnps1: list of SP.arrays of I interaction variables to be tested for N individuals. Note that it is assumed that Asnps0 is already included. If not provided, the alternative model will be the independent model Asnps0: single SP.array of I0 interaction variables to be included in the background model when testing for interaction with Inters K1r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K1c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2r: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed K2c: [P x P] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covar_type: type of covaraince to use. Default 'freeform'. possible values are 'freeform': free form optimization, 'fixed': use a fixed matrix specified in covar_K0, 'diag': optimize a diagonal matrix, 'lowrank': optimize a low rank matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_id': optimize a low rank matrix plus the weight of a constant diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'lowrank_diag': optimize a low rank matrix plus a free diagonal matrix. The rank of the lowrank part is specified in the variable rank, 'block': optimize the weight of a constant P x P block matrix of ones, 'block_id': optimize the weight of a constant P x P block matrix of ones plus the weight of a constant diagonal matrix, 'block_diag': optimize the weight of a constant P x P block matrix of ones plus a free diagonal matrix, rank: rank of a possible lowrank component (default 1) searchDelta: Boolean indicator if delta is optimized during SNP testing (default False) Returns: pv: P-values of the interaction test lrt0: log likelihood ratio statistics of the null model pv0: P-values of the null model lrt: log likelihood ratio statistics of the interaction test lrtAlt: log likelihood ratio statistics of the alternative model pvAlt: P-values of the alternative model """ S = snps.shape[1] #0. checks N = phenos.shape[0] P = phenos.shape[1] if K1r == None: K1r = SP.dot(snps, snps.T) else: assert K1r.shape[0] == N, 'K1r: dimensions dismatch' assert K1r.shape[1] == N, 'K1r: dimensions dismatch' if K2r == None: K2r = SP.eye(N) else: assert K2r.shape[0] == N, 'K2r: dimensions dismatch' assert K2r.shape[1] == N, 'K2r: dimensions dismatch' covs, Acovs = updateKronCovs(covs, Acovs, N, P) #Asnps can be several designs if (Asnps0 is None): Asnps0 = [SP.ones([1, P])] if Asnps1 is None: Asnps1 = [SP.eye([P])] if (type(Asnps0) != list): Asnps0 = [Asnps0] if (type(Asnps1) != list): Asnps1 = [Asnps1] assert (len(Asnps0) == 1) and ( len(Asnps1) > 0), "need at least one Snp design matrix for null and alt model" #one row per column design matrix pv = SP.zeros((len(Asnps1), snps.shape[1])) lrt = SP.zeros((len(Asnps1), snps.shape[1])) pvAlt = SP.zeros((len(Asnps1), snps.shape[1])) lrtAlt = SP.zeros((len(Asnps1), snps.shape[1])) #1. run GP model to infer suitable covariance structure if K1c == None or K2c == None: vc = estimateKronCovariances(phenos=phenos, K1r=K1r, K2r=K2r, K1c=K1c, K2c=K2c, covs=covs, Acovs=Acovs, covar_type=covar_type, rank=rank) K1c = vc.getEstTraitCovar(0) K2c = vc.getEstTraitCovar(1) else: assert K1c.shape[0] == P, 'K1c: dimensions dismatch' assert K1c.shape[1] == P, 'K1c: dimensions dismatch' assert K2c.shape[0] == P, 'K2c: dimensions dismatch' assert K2c.shape[1] == P, 'K2c: dimensions dismatch' #2. run kroneckerLMM for null model lmm = limix.CKroneckerLMM() lmm.setK1r(K1r) lmm.setK1c(K1c) lmm.setK2r(K2r) lmm.setK2c(K2c) lmm.setSNPs(snps) #add covariates for ic in range(len(Acovs)): lmm.addCovariates(covs[ic], Acovs[ic]) lmm.setPheno(phenos) if searchDelta: lmm.setNumIntervalsAlt(100) else: lmm.setNumIntervalsAlt(0) lmm.setNumIntervals0(100) #add SNP design lmm.setSNPcoldesign(Asnps0[0]) lmm.process() dof0 = Asnps0[0].shape[0] pv0 = lmm.getPv() lrt0 = ST.chi2.isf(pv0, dof0) for iA in range(len(Asnps1)): dof1 = Asnps1[iA].shape[0] dof = dof1 - dof0 lmm.setSNPcoldesign(Asnps1[iA]) lmm.process() pvAlt[iA, :] = lmm.getPv()[0] lrtAlt[iA, :] = ST.chi2.isf(pvAlt[iA, :], dof1) lrt[iA, :] = lrtAlt[iA, :] - lrt0[ 0] # Don't need the likelihood ratios, as null model is the same between the two models pv[iA, :] = ST.chi2.sf(lrt[iA, :], dof) return pv, lrt0, pv0, lrt, lrtAlt, pvAlt
def H_inv(self): return la.cho_solve((self.H_chol(), True), sp.eye(self.rank_r * self.rank_c))
def lognet(x, is_sparse, irs, pcs, y, weights, offset, parm, nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, thresh, isd, intr, maxit, kopt, family): # load shared fortran library glmlib = loadGlmLib() # noo = y.shape[0] if len(y.shape) > 1: nc = y.shape[1] else: nc = 1 if noo != nobs: raise ValueError( 'x and y have different number of rows in call to glmnet') if nc == 1: classes, sy = scipy.unique(y, return_inverse=True) nc = len(classes) indexes = scipy.eye(nc, nc) y = indexes[sy, :] else: classes = scipy.arange(nc) + 1 # 1:nc # if family == 'binomial': if nc > 2: raise ValueError( 'More than two classes in y. use multinomial family instead') else: nc = 1 y = y[:, [1, 0]] # if len(weights) != 0: t = weights > 0 if ~scipy.all(t): t = scipy.reshape(t, (len(y), )) y = y[t, :] x = x[t, :] weights = weights[t] nobs = scipy.sum(t) else: t = scipy.empty([0], dtype=scipy.integer) # if len(y.shape) == 1: mv = len(y) ny = 1 else: mv, ny = y.shape y = y * scipy.tile(weights, (1, ny)) # if len(offset) == 0: offset = y * 0 is_offset = False else: if len(t) != 0: offset = offset[t, :] do = offset.shape if do[0] != nobs: raise ValueError( 'offset should have the same number of values as observations in binominal/multinomial call to glmnet' ) if nc == 1: if do[1] == 1: offset = scipy.column_stack((offset, -offset), 1) if do[1] > 2: raise ValueError( 'offset should have 1 or 2 columns in binomial call to glmnet' ) if (family == 'multinomial') and (do[1] != nc): raise ValueError( 'offset should have same shape as y in multinomial call to glmnet' ) is_offset = True # now convert types and allocate memory before calling # glmnet fortran library ###################################### # --------- PROCESS INPUTS ----------- ###################################### # force inputs into fortran order and scipy float64 copyFlag = False x = x.astype(dtype=scipy.float64, order='F', copy=copyFlag) irs = irs.astype(dtype=scipy.int32, order='F', copy=copyFlag) pcs = pcs.astype(dtype=scipy.int32, order='F', copy=copyFlag) y = y.astype(dtype=scipy.float64, order='F', copy=copyFlag) weights = weights.astype(dtype=scipy.float64, order='F', copy=copyFlag) offset = offset.astype(dtype=scipy.float64, order='F', copy=copyFlag) jd = jd.astype(dtype=scipy.int32, order='F', copy=copyFlag) vp = vp.astype(dtype=scipy.float64, order='F', copy=copyFlag) cl = cl.astype(dtype=scipy.float64, order='F', copy=copyFlag) ulam = ulam.astype(dtype=scipy.float64, order='F', copy=copyFlag) ###################################### # --------- ALLOCATE OUTPUTS --------- ###################################### # lmu lmu = -1 lmu_r = ctypes.c_int(lmu) # a0, ca if nc == 1: a0 = scipy.zeros([nlam], dtype=scipy.float64) ca = scipy.zeros([nx, nlam], dtype=scipy.float64) else: a0 = scipy.zeros([nc, nlam], dtype=scipy.float64) ca = scipy.zeros([nx, nc, nlam], dtype=scipy.float64) # a0 a0 = a0.astype(dtype=scipy.float64, order='F', copy=False) a0_r = a0.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ca ca = ca.astype(dtype=scipy.float64, order='F', copy=False) ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # ia ia = -1 * scipy.ones([nx], dtype=scipy.int32) ia = ia.astype(dtype=scipy.int32, order='F', copy=False) ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # nin nin = -1 * scipy.ones([nlam], dtype=scipy.int32) nin = nin.astype(dtype=scipy.int32, order='F', copy=False) nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int)) # dev dev = -1 * scipy.ones([nlam], dtype=scipy.float64) dev = dev.astype(dtype=scipy.float64, order='F', copy=False) dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # alm alm = -1 * scipy.ones([nlam], dtype=scipy.float64) alm = alm.astype(dtype=scipy.float64, order='F', copy=False) alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) # nlp nlp = -1 nlp_r = ctypes.c_int(nlp) # jerr jerr = -1 jerr_r = ctypes.c_int(jerr) # dev0 dev0 = -1 dev0_r = ctypes.c_double(dev0) # ################################### # main glmnet fortran caller # ################################### if is_sparse: # sparse lognet glmlib.splognet_( ctypes.byref(ctypes.c_double(parm)), ctypes.byref(ctypes.c_int(nobs)), ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nc)), x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), pcs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), irs.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)), ctypes.byref(ctypes.c_int(nlam)), ctypes.byref(ctypes.c_double(flmin)), ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_double(thresh)), ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)), ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(ctypes.c_int(kopt)), ctypes.byref(lmu_r), a0_r, ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r), ctypes.byref(jerr_r)) else: # call fortran lognet routine glmlib.lognet_(ctypes.byref(ctypes.c_double(parm)), ctypes.byref(ctypes.c_int(nobs)), ctypes.byref(ctypes.c_int(nvars)), ctypes.byref(ctypes.c_int(nc)), x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), y.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_int(ne)), ctypes.byref(ctypes.c_int(nx)), ctypes.byref(ctypes.c_int(nlam)), ctypes.byref(ctypes.c_double(flmin)), ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), ctypes.byref(ctypes.c_double(thresh)), ctypes.byref(ctypes.c_int(isd)), ctypes.byref(ctypes.c_int(intr)), ctypes.byref(ctypes.c_int(maxit)), ctypes.byref(ctypes.c_int(kopt)), ctypes.byref(lmu_r), a0_r, ca_r, ia_r, nin_r, ctypes.byref(dev0_r), dev_r, alm_r, ctypes.byref(nlp_r), ctypes.byref(jerr_r)) # ################################### # post process results # ################################### # check for error if jerr_r.value > 0: raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value) elif jerr_r.value < 0: print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value) print("Check results for accuracy. Partial or no results returned.") # clip output to correct sizes lmu = lmu_r.value if nc == 1: a0 = a0[0:lmu] ca = ca[0:nx, 0:lmu] else: a0 = a0[0:nc, 0:lmu] ca = ca[0:nx, 0:nc, 0:lmu] ia = ia[0:nx] nin = nin[0:lmu] dev = dev[0:lmu] alm = alm[0:lmu] # ninmax ninmax = max(nin) # fix first value of alm (from inf to correct value) if ulam[0] == 0.0: t1 = scipy.log(alm[1]) t2 = scipy.log(alm[2]) alm[0] = scipy.exp(2 * t1 - t2) # create return fit dictionary if family == 'multinomial': a0 = a0 - scipy.tile(scipy.mean(a0), (nc, 1)) dfmat = a0.copy() dd = scipy.array([nvars, lmu], dtype=scipy.integer) beta_list = list() if ninmax > 0: # TODO: is the reshape here done right? ca = scipy.reshape(ca, (nx, nc, lmu)) ca = ca[0:ninmax, :, :] ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran oja = scipy.argsort(ja) ja1 = ja[oja] df = scipy.any(scipy.absolute(ca) > 0, axis=1) df = scipy.sum(df) df = scipy.reshape(df, (1, df.size)) for k in range(0, nc): ca1 = scipy.reshape(ca[:, k, :], (ninmax, lmu)) cak = ca1[oja, :] dfmat[k, :] = scipy.sum(scipy.absolute(cak) > 0, axis=0) beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) beta[ja1, :] = cak beta_list.append(beta) else: for k in range(0, nc): dfmat[k, :] = scipy.zeros([1, lmu], dtype=scipy.float64) beta_list.append(scipy.zeros([nvars, lmu], dtype=scipy.float64)) # df = scipy.zeros([1, lmu], dtype=scipy.float64) # if kopt == 2: grouped = True else: grouped = False # fit = dict() fit['a0'] = a0 fit['label'] = classes fit['beta'] = beta_list fit['dev'] = dev fit['nulldev'] = dev0_r.value fit['dfmat'] = dfmat fit['df'] = df fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value fit['dim'] = dd fit['grouped'] = grouped fit['offset'] = is_offset fit['class'] = 'multnet' else: dd = scipy.array([nvars, lmu], dtype=scipy.integer) if ninmax > 0: ca = ca[0:ninmax, :] df = scipy.sum(scipy.absolute(ca) > 0, axis=0) ja = ia[0:ninmax] - 1 # ia is 1-indexes in fortran oja = scipy.argsort(ja) ja1 = ja[oja] beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) beta[ja1, :] = ca[oja, :] else: beta = scipy.zeros([nvars, lmu], dtype=scipy.float64) df = scipy.zeros([1, lmu], dtype=scipy.float64) # fit = dict() fit['a0'] = a0 fit['label'] = classes fit['beta'] = beta fit['dev'] = dev fit['nulldev'] = dev0_r.value fit['df'] = df fit['lambdau'] = alm fit['npasses'] = nlp_r.value fit['jerr'] = jerr_r.value fit['dim'] = dd fit['offset'] = is_offset fit['class'] = 'lognet' # ################################### # return to caller # ################################### return fit
def _revertToSafety(self): """ When encountering a bad matrix, this is how we revert to a safe one. """ self.factorSigma = eye(self.numParameters) self.x = self.bestEvaluable self.allFactorSigmas[-1][:] = self.factorSigma self.sigma = dot(self.factorSigma.T, self.factorSigma)
def toarray(self): return sp.eye(self.shape[0], dtype=self.dtype)
def get_ham(J, h): ham = -J * (sp.kron(Sx, Sx) + h * sp.kron(Sz, sp.eye(2))).reshape(2, 2, 2, 2) return ham
def test_trivial(): n = 5 X = ones((n, 1)) A = eye(n) compare_solutions(A, None, n)
def get_LDpred_ld_tables(snps, ld_radius=100, ld_window_size=0, h2=None, n_training=None, gm=None, gm_ld_radius=None): """ Calculates LD tables, and the LD score in one go... """ ld_dict = {} m, n = snps.shape ld_scores = sp.ones(m) ret_dict = {} if gm_ld_radius is None: for snp_i, snp in enumerate(snps): # Calculate D start_i = max(0, snp_i - ld_radius) stop_i = min(m, snp_i + ld_radius + 1) X = snps[start_i:stop_i] D_i = sp.dot(snp, X.T) / n r2s = D_i**2 ld_dict[snp_i] = D_i lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32') ld_scores[snp_i] = lds_i else: assert gm is not None, 'Genetic map is missing.' window_sizes = [] ld_boundaries = [] for snp_i, snp in enumerate(snps): curr_cm = gm[snp_i] # Now find lower boundary start_i = snp_i min_cm = gm[snp_i] while start_i > 0 and min_cm > curr_cm - gm_ld_radius: start_i = start_i - 1 min_cm = gm[start_i] # Now find the upper boundary stop_i = snp_i max_cm = gm[snp_i] while stop_i > 0 and max_cm < curr_cm + gm_ld_radius: stop_i = stop_i + 1 max_cm = gm[stop_i] ld_boundaries.append([start_i, stop_i]) curr_ws = stop_i - start_i window_sizes.append(curr_ws) assert curr_ws > 0, 'Some issues with the genetic map' X = snps[start_i:stop_i] D_i = sp.dot(snp, X.T) / n r2s = D_i**2 ld_dict[snp_i] = D_i lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32') ld_scores[snp_i] = lds_i avg_window_size = sp.mean(window_sizes) print('Average # of SNPs in LD window was %0.2f' % avg_window_size) if ld_window_size == 0: ld_window_size = avg_window_size * 2 ret_dict['ld_boundaries'] = ld_boundaries ret_dict['ld_dict'] = ld_dict ret_dict['ld_scores'] = ld_scores if ld_window_size > 0: ref_ld_matrices = [] inf_shrink_matrices = [] for wi in range(0, m, ld_window_size): start_i = wi stop_i = min(m, wi + ld_window_size) curr_window_size = stop_i - start_i X = snps[start_i:stop_i] D = sp.dot(X, X.T) / n ref_ld_matrices.append(D) if h2 != None and n_training != None: A = ((m / h2) * sp.eye(curr_window_size) + (n_training / (1)) * D) A_inv = linalg.pinv(A) inf_shrink_matrices.append(A_inv) ret_dict['ref_ld_matrices'] = ref_ld_matrices if h2 != None and n_training != None: ret_dict['inf_shrink_matrices'] = inf_shrink_matrices return ret_dict
def varianceDecomposition(self,K=None,tech_noise=None,idx=None,i0=None,i1=None,max_iter=10,verbose=False): """ Args: K: list of random effects to be considered in the analysis idx: indices of the genes to be considered in the analysis i0: gene index from which the anlysis starts i1: gene index to which the analysis stops max_iter: maximum number of random restarts verbose: if True, print progresses """ if tech_noise is not None: self.set_tech_noise(tech_noise) assert self.tech_noise is not None, 'scLVM:: specify technical noise' assert K is not None, 'scLVM:: specify K' if not isinstance(K, list): K = [K] for k in K: assert k.shape[0]==self.N, 'scLVM:: K dimension dismatch' assert k.shape[1]==self.N, 'scLVM:: K dimension dismatch' if idx is None: if i0 is None or i1 is None: i0 = 0; i1 = self.G idx = SP.arange(i0,i1) elif not isinstance(idx, SP.ndarray): idx = SP.array([idx]) _G = len(idx) var = SP.zeros((_G,len(K)+2)) _idx = SP.zeros(_G) geneID = SP.zeros(_G,dtype=str) conv = SP.zeros(_G)==1 Ystar = [SP.zeros((self.N,_G)) for i in range(len(K))] count = 0 Ystd = self.Y-self.Y.mean(0) #delta optimization might be more efficient Ystd/= self.Y.std(0) tech_noise = self.tech_noise/SP.array(self.Y.std(0))**2 for ids in idx: if verbose: print('.. fitting gene %d'%ids) # extract a single gene y = Ystd[:,ids:ids+1] # build and fit variance decomposition model vc= VAR.VarianceDecomposition(y) vc.addFixedEffect() for k in K: vc.addRandomEffect(k) vc.addRandomEffect(SP.eye(self.N)) vc.addRandomEffect(SP.eye(self.N)) vc.vd.getTerm(len(K)+1).getKcf().setParamMask(SP.zeros(1)) for iter_i in range(max_iter): scales0 = y.std()*SP.randn(len(K)+2) scales0[len(K)+1]=SP.sqrt(tech_noise[ids]); _conv = vc.optimize(scales0=scales0, n_times=2) if _conv: break conv[count] = _conv if self.geneID is not None: geneID[count] = self.geneID[ids] if not _conv: var[count,-2] = SP.maximum(0,y.var()-tech_noise[ids]) var[count,-1] = tech_noise[ids] count+=1; continue _var = vc.getVarianceComps()[0,:] KiY = vc.gp.agetKEffInvYCache().ravel() for ki in range(len(K)): Ystar[ki][:,count]=_var[ki]*SP.dot(K[ki],KiY) var[count,:] = _var count+=1; # col header col_header = ['hidden_%d'%i for i in range(len(K))] col_header.append('biol_noise') col_header.append('tech_noise') col_header = SP.array(col_header) # annotate column and rows of var and Ystar var_info = {'gene_idx':idx,'col_header':col_header,'conv':conv} if geneID is not None: var_info['geneID'] = SP.array(geneID) Ystar_info = {'gene_idx':idx,'conv':conv} if geneID is not None: Ystar_info['geneID'] = SP.array(geneID) # cache stuff self.var = var self.Ystar = Ystar self.var_info = var_info self.Ystar_info = Ystar_info
def step(self, niter): """ xNES """ f = self.f mu, sigma, bmat = self.mu, self.sigma, self.bmat eta_mu, eta_sigma, eta_bmat = self.eta_mu, self.eta_sigma, self.eta_bmat npop = self.npop dim = self.dim sigma_old = self.sigma_old eyemat = eye(dim) with joblib.Parallel(n_jobs=self.n_jobs) as parallel: for i in range(niter): s_try = randn(npop, dim) z_try = mu + sigma * dot(s_try, bmat) # broadcast f_try = parallel(joblib.delayed(f)(z) for z in z_try) f_try = asarray(f_try) # save if best fitness = mean(f_try) if fitness - 1e-8 > self.fitness_best: self.fitness_best = fitness self.mu_best = mu.copy() self.counter = 0 else: self.counter += 1 if self.counter > self.patience: self.done = True return isort = argsort(f_try) f_try = f_try[isort] s_try = s_try[isort] z_try = z_try[isort] u_try = self.utilities if self.use_fshape else f_try if self.use_adasam and sigma_old is not None: # sigma_old must be available eta_sigma = self.adasam(eta_sigma, mu, sigma, bmat, sigma_old, z_try) dj_delta = dot(u_try, s_try) dj_mmat = dot(s_try.T, s_try * u_try.reshape(npop, 1)) - sum(u_try) * eyemat dj_sigma = trace(dj_mmat) * (1.0 / dim) dj_bmat = dj_mmat - dj_sigma * eyemat sigma_old = sigma # update mu += eta_mu * sigma * dot(bmat, dj_delta) sigma *= exp(0.5 * eta_sigma * dj_sigma) bmat = dot(bmat, expm(0.5 * eta_bmat * dj_bmat)) # logging self.history['fitness'].append(fitness) self.history['sigma'].append(sigma) self.history['eta_sigma'].append(eta_sigma) # keep last results self.mu, self.sigma, self.bmat = mu, sigma, bmat self.eta_sigma = eta_sigma self.sigma_old = sigma_old
def get_hamiltonian(h): ham = -(sp.kron(Sx, Sx) + h * sp.kron(Sz, sp.eye(2))).reshape(2, 2, 2, 2) return ham
def reset(self): self.trainx = zeros((0, self.indim), float) self.trainy = zeros((0), float) self.noise = zeros((0), float) self.pred_mean = zeros(len(self.testx)) self.pred_cov = eye(len(self.testx))
def run_emmax(hdf5_filename='/home/bv25/data/Ls154/Ls154_12.hdf5', out_file='/home/bv25/data/Ls154/Ls154_results.hdf5', min_maf=0.1, recalculate_kinship=True, chunk_size=1000): """ Apply the EMMAX algorithm to hdf5 formated genotype/phenotype data """ ih5f = h5py.File(hdf5_filename) gg = ih5f['genot_data'] ig = ih5f['indiv_data'] n_indivs = len(ig['indiv_ids'][...]) if recalculate_kinship: print 'Calculating kinship.' k_mat = sp.zeros((n_indivs, n_indivs), dtype='single') chromosomes = gg.keys() n_snps = 0 for chrom in chromosomes: print 'Working on Chromosome %s' % chrom cg = gg[chrom] freqs = cg['freqs'][...] mafs = sp.minimum(freqs, 1 - freqs) maf_filter = mafs > min_maf print 'Filtered out %d SNPs with MAF<%0.2f.' % ( len(maf_filter) - sum(maf_filter), min_maf) snps = cg['raw_snps'][...] snps = snps[maf_filter] num_snps = len(snps) for chunk_i, i in enumerate(range(0, num_snps, chunk_size)): end_i = min(i + chunk_size, num_snps) x = snps[i:end_i] x = x.T x = (x - sp.mean(x, 0)) / sp.std(x, 0) x = x.T n_snps += len(x) k_mat += sp.dot(x.T, x) del x sys.stdout.write( '\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (min(1, ((chunk_i + 1.0) * chunk_size) / num_snps)))) sys.stdout.flush() sys.stdout.write('\b\b\b\b\b\b\b100.00%\n') k_mat = k_mat / float(n_snps) c = sp.sum( (sp.eye(len(k_mat)) - (1.0 / len(k_mat)) * sp.ones(k_mat.shape)) * sp.array(k_mat)) scalar = (len(k_mat) - 1) / c print 'Kinship scaled by: %0.4f' % scalar k = scalar * k_mat else: assert 'kinship' in ih5f.keys( ), 'Kinship is missing. Please calculate that first!' k = ih5f['kinship'] # Get the phenotypes phenotypes = ig['phenotypes'][...] # Initialize the mixed model lmm = lm.LinearMixedModel(phenotypes) lmm.add_random_effect(k) # Calculate pseudo-heritability, etc. print 'Calculating the eigenvalues of K' s0 = time.time() eig_L = lmm._get_eigen_L_() print 'Done.' print 'Took %0.2f seconds' % (time.time() - s0) print "Calculating the eigenvalues of S(K+I)S where S = I-X(X'X)^-1X'" s0 = time.time() eig_R = lmm._get_eigen_R_(X=lmm.X) print 'Done' print 'Took %0.2f seconds' % (time.time() - s0) print 'Getting variance estimates' s0 = time.time() res = lmm.get_estimates(eig_L, method='REML', eig_R=eig_R) # Get the variance estimates.. print 'Done.' print 'Took %0.2f seconds' % (time.time() - s0) print 'pseudo_heritability:', res['pseudo_heritability'] # Initialize results file oh5f = h5py.File(out_file) # Store phenotype_data oh5f.create_dataset('pseudo_heritability', data=sp.array(res['pseudo_heritability'])) oh5f.create_dataset('ve', data=sp.array(res['ve'])) oh5f.create_dataset('vg', data=sp.array(res['vg'])) oh5f.create_dataset('max_ll', data=sp.array(res['max_ll'])) oh5f.create_dataset('num_snps', data=ih5f['num_snps']) # Construct results data containers chrom_res_group = oh5f.create_group('chrom_results') for chrom in gg.keys(): crg = chrom_res_group.create_group(chrom) # Get the SNPs print 'Working on Chromosome: %s' % chrom freqs = gg[chrom]['freqs'][...] mafs = sp.minimum(freqs, 1 - freqs) maf_filter = mafs > min_maf print 'Filtered out %d SNPs with MAF<%0.2f.' % ( len(maf_filter) - sum(maf_filter), min_maf) snps = gg[chrom]['raw_snps'][...] snps = snps[maf_filter] positions = gg[chrom]['positions'][...] positions = positions[maf_filter] # Now run EMMAX print "Running EMMAX" s1 = time.time() r = lmm._emmax_f_test_(snps, res['H_sqrt_inv'], with_betas=False, emma_num=0, eig_L=eig_L) secs = time.time() - s1 if secs > 60: mins = int(secs) / 60 secs = secs % 60 print 'Took %d mins and %0.1f seconds.' % (mins, secs) else: print 'Took %0.1f seconds.' % (secs) crg.create_dataset('ps', data=r['ps']) crg.create_dataset('positions', data=positions) oh5f.flush() ih5f.close() oh5f.close()
def _UU(self): RV = sp.dot(self.DhW().T, self.DhW()) _S, _U = la.eigh(RV) if _S.min() < 0: RV += (abs(_S.min()) + 1e-9) * sp.eye(RV.shape[0]) return RV
def run_emmax_perm(hdf5_filename='/home/bv25/data/Ls154/Ls154_12.hdf5', out_file='/home/bv25/data/Ls154/Ls154_results_perm.hdf5', min_maf=0.1, recalculate_kinship=True, chunk_size=1000, num_perm=500): """ Apply the EMMAX algorithm to hdf5 formated genotype/phenotype data """ ih5f = h5py.File(hdf5_filename) gg = ih5f['genot_data'] ig = ih5f['indiv_data'] n_indivs = len(ig['indiv_ids'][...]) print 'Calculating kinship.' k_mat = sp.zeros((n_indivs, n_indivs), dtype='single') chromosomes = gg.keys() # chromosomes = chromosomes[-1:] n_snps = 0 for chrom in chromosomes: print 'Working on Chromosome %s' % chrom cg = gg[chrom] freqs = cg['freqs'][...] mafs = sp.minimum(freqs, 1 - freqs) maf_filter = mafs > min_maf print 'Filtered out %d SNPs with MAF<%0.2f.' % ( len(maf_filter) - sum(maf_filter), min_maf) snps = cg['raw_snps'][...] snps = snps[maf_filter] num_snps = len(snps) for chunk_i, i in enumerate(range(0, num_snps, chunk_size)): end_i = min(i + chunk_size, num_snps) x = snps[i:end_i] x = x.T x = (x - sp.mean(x, 0)) / sp.std(x, 0) x = x.T n_snps += len(x) k_mat += sp.dot(x.T, x) del x sys.stdout.write( '\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (min(1, ((chunk_i + 1.0) * chunk_size) / num_snps)))) sys.stdout.flush() sys.stdout.write('\b\b\b\b\b\b\b100.00%\n') k_mat = k_mat / float(n_snps) c = sp.sum((sp.eye(len(k_mat)) - (1.0 / len(k_mat)) * sp.ones(k_mat.shape)) * sp.array(k_mat)) scalar = (len(k_mat) - 1) / c print 'Kinship scaled by: %0.4f' % scalar k = scalar * k_mat # Store the kinship # Initialize results file oh5f = h5py.File(out_file) oh5f.create_dataset('kinship', data=k) oh5f.flush() chromosomes = gg.keys() num_tot_snps = 0 num_12_chr_snps = 0 for chrom in chromosomes: cg = gg[chrom] freqs = cg['freqs'][...] mafs = sp.minimum(freqs, 1 - freqs) maf_filter = mafs > min_maf n_snps = sum(maf_filter) num_tot_snps += n_snps if chrom != chromosomes[-1]: num_12_chr_snps += n_snps # Get the phenotypes phenotypes = ig['phenotypes'][...] # Initialize the mixed model lmm = lm.LinearMixedModel(phenotypes) lmm.add_random_effect(k) # Calculate pseudo-heritability, etc. print 'Calculating the eigenvalues of K' s0 = time.time() eig_L = lmm._get_eigen_L_() print 'Done.' print 'Took %0.2f seconds' % (time.time() - s0) print "Calculating the eigenvalues of S(K+I)S where S = I-X(X'X)^-1X'" s0 = time.time() eig_R = lmm._get_eigen_R_(X=lmm.X) print 'Done' print 'Took %0.2f seconds' % (time.time() - s0) print 'Getting variance estimates' s0 = time.time() res = lmm.get_estimates(eig_L, method='REML', eig_R=eig_R) # Get the variance estimates.. print 'Done.' print 'Took %0.2f seconds' % (time.time() - s0) print 'pseudo_heritability:', res['pseudo_heritability'] # Store phenotype_data oh5f.create_dataset('pseudo_heritability', data=sp.array(res['pseudo_heritability'])) oh5f.create_dataset('ve', data=sp.array(res['ve'])) oh5f.create_dataset('vg', data=sp.array(res['vg'])) oh5f.create_dataset('max_ll', data=sp.array(res['max_ll'])) oh5f.create_dataset('num_snps', data=sp.array(n_snps)) # Construct results data containers chrom_res_group = oh5f.create_group('chrom_results') # all_snps = sp.empty((n_snps, n_indivs)) chr12_snps = sp.empty((num_12_chr_snps, n_indivs)) i = 0 for chrom in gg.keys(): crg = chrom_res_group.create_group(chrom) # Get the SNPs print 'Working on Chromosome: %s' % chrom freqs = gg[chrom]['freqs'][...] mafs = sp.minimum(freqs, 1 - freqs) maf_filter = mafs > min_maf print 'Filtered out %d SNPs with MAF<%0.2f.' % ( len(maf_filter) - sum(maf_filter), min_maf) snps = gg[chrom]['raw_snps'][...] snps = snps[maf_filter] positions = gg[chrom]['positions'][...] positions = positions[maf_filter] n = len(snps) # all_snps[i:i + n] = snps if chrom != chromosomes[-1]: chr12_snps[i:i + n] = snps # Now run EMMAX print "Running EMMAX" s1 = time.time() r = lmm._emmax_f_test_(snps, res['H_sqrt_inv'], with_betas=False, emma_num=0, eig_L=eig_L) secs = time.time() - s1 if secs > 60: mins = int(secs) / 60 secs = secs % 60 print 'Took %d mins and %0.1f seconds.' % (mins, secs) else: print 'Took %0.1f seconds.' % (secs) crg.create_dataset('ps', data=r['ps']) crg.create_dataset('positions', data=positions) oh5f.flush() i += n print 'Starting permutation test for detecting the genome-wide significance threshold' s1 = time.time() perm_res = lmm._emmax_permutations_(chr12_snps, k, res['H_sqrt_inv'], num_perm=num_perm) secs = time.time() - s1 if secs > 60: mins = int(secs) / 60 secs = secs % 60 print 'Took %d mins and %0.1f seconds.' % (mins, secs) else: print 'Took %0.1f seconds.' % (secs) perm_res['min_ps'].sort() perm_res['max_f_stats'].sort() perm_res['max_f_stats'][::-1] # reverse array five_perc_i = int(num_perm / 20) print "The 0.05 genome-wide significance threshold is %0.4e, and the corresponding statistic is %0.4e." % ( perm_res['min_ps'][five_perc_i], perm_res['max_f_stats'][five_perc_i]) oh5f.create_dataset('perm_min_ps', data=perm_res['min_ps']) oh5f.create_dataset('perm_max_f_stats', data=perm_res['max_f_stats']) oh5f.create_dataset('five_perc_perm_min_ps', data=perm_res['min_ps'][five_perc_i]) oh5f.create_dataset('five_perc_perm_max_f_stats', data=perm_res['max_f_stats'][five_perc_i]) ih5f.close() oh5f.close()
def parameters(cmdargs): """ cmdargs: -q, qubits -k, lrule -f, nmems """ # The Hopfield parameters hparams = { 'numNeurons': cmdargs['qubits'], 'inputState': [ 2 * sp.random.random_integers(0, 1) - 1 for k in xrange(cmdargs['qubits']) ], 'learningRule': cmdargs['simtype'], 'numMemories': int(cmdargs['farg']) } # Construct memories memories = [[ 2 * sp.random.random_integers(0, 1) - 1 for k in xrange(hparams['numNeurons']) ] for j in xrange(hparams['numMemories'])] # At least one pattern must be one Hamming unit away from the input memories[0] = list(hparams['inputState']) memories[0][sp.random.random_integers(0, hparams['numNeurons'] - 1)] *= -1 # Make sure all other patterns are not the input state def hamdist(a, b): """ Calculate Hamming distance. """ return sp.sum(abs(sp.array(a) - sp.array(b)) / 2.0) # Loop over additional memories, if there are any for imem, mem in enumerate(memories[1:]): while hamdist(mem, hparams['inputState']) < 1.0: # Flip a random spin rndbit = sp.random.random_integers(0, hparams['numNeurons'] - 1) memories[imem + 1][rndbit] *= -1 # Basic simulation params nQubits = hparams['numNeurons'] T = 1000.0 # sp.arange(0.1, 15, 0.5) # T = sp.array([10.0, 20.0, 50.0, 100.0, 500.0, # 1000.0, 5000.0, 10000.0, 50000.0]) dt = 0.005 * T # Define states for which to track probabilities in time # import statelabels # label_list = statelabels.GenerateLabels(nQubits) # stateoverlap = [] # for mem in memories: # # Convert spins to bits # bitstr = ''.join([ '0' if k == 1 else '1' for k in mem ]) # # Get the index of the current (converted) memory and add it to list # stateoverlap.append([ label_list.index(bitstr), bitstr ]) stateoverlap = None # Output parameters binary = 1 # Save output files as binary Numpy format progressout = 0 # Output simulation progress over anneal timesteps eigspecdat = 1 # Output data for eigspec eigspecplot = 0 # Plot eigspec eigspecnum = 2**nQubits # Number of eigenvalues to output fidelplot = 0 # Plot fidelity fideldat = 0 # Output fidelity data fidelnumstates = 2**nQubits # Check fidelity with this number of eigenstates overlapdat = 0 # Output overlap data overlapplot = 0 # Plot overlap solveMethod = 'ExpPert' # 'ExpPert', 'SuzTrot', 'ForRuth', 'BCM' # Output directory stuff probdir = 'data/hopfield_exp3_nodiag/n'+str(nQubits)+'p'+\ str(hparams['numMemories'])+hparams['learningRule'] if isinstance(T, collections.Iterable): probdir += 'MultiT' if os.path.isdir(probdir): outlist = sorted( [int(name) for name in os.listdir(probdir) if name.isdigit()]) else: outlist = [] outnum = outlist[-1] + 1 if outlist else 0 outputdir = probdir + '/' + str(outnum) + '/' probshow = 0 # Print final state probabilities to screen probout = 1 # Output probabilities to file mingap = 0 # Record the minimum spectral gap errchk = 0 # Error-checking on/off (for simulation accuracy) eps = 0.01 # Numerical error in normalization condition (1 - norm < eps) # Specify a QUBO (convert to Ising = True), or alpha, beta directly # (convert = False), and also specify the signs on the Ising Hamiltonian # terms (you can specify coefficients too for some problems if needed) isingConvert = 0 isingSigns = {'hx': -1, 'hz': -1, 'hzz': -1} # Construct network Ising parameters neurons = nQubits # This is gamma, the appropriate weighting on the input vector # isingSigns['hz'] *= 1 - (len(hparams['inputState']) - # hparams['inputState'].count(0))/(2*neurons) # isingSigns['hz'] *= 1.0/(5*neurons) # isingSigns['hz'] *= 0.2 alpha = sp.array(hparams['inputState']) beta = sp.zeros((neurons, neurons)) delta = sp.array([]) # Construct the memory matrix according to a learning rule if hparams['learningRule'] == 'hebb': # Hebb rule isingSigns['hz'] *= 0.5 memMat = sp.matrix(memories).T beta = sp.triu(memMat * memMat.T) / float(neurons) elif hparams['learningRule'] == 'stork': # Storkey rule isingSigns['hz'] *= 0.15 Wm = sp.zeros((neurons, neurons)) for m, mem in enumerate(memories): Am = sp.outer(mem, mem) - sp.eye(neurons) Wm += (Am - Am * Wm - Wm * Am) / float(neurons) beta = sp.triu(Wm) elif hparams['learningRule'] == 'proj': isingSigns['hz'] *= 0.15 # Moore-Penrose pseudoinverse rule memMat = sp.matrix(memories).T beta = sp.triu(memMat * sp.linalg.pinv(memMat)) sp.fill_diagonal(beta, 0.0) # Some outputs outputs = { 'nQubits': nQubits, 'learningRule': hparams['learningRule'], 'outdir': probdir, 'inputState': hparams['inputState'], 'memories': memories, 'answer': memories[0], 'annealTime': list(T) if isinstance(T, collections.Iterable) else T } ############################################################################ ######## All variables must be specified here, do NOT change the keys ###### ############################################################################ return { 'nQubits': nQubits, 'Q': None, 'T': T, 'dt': dt, 'outputdir': outputdir, 'errchk': errchk, 'eps': eps, 'isingConvert': isingConvert, 'isingSigns': isingSigns, 'outputs': outputs, 'alpha': alpha, 'beta': beta, 'delta': delta, 'eigdat': eigspecdat, 'eigplot': eigspecplot, 'eignum': eigspecnum, 'fiddat': fideldat, 'fidplot': fidelplot, 'fidnumstates': fidelnumstates, 'overlapdat': overlapdat, 'overlapplot': overlapplot, 'outdir': outputdir, 'binary': binary, 'progressout': progressout, 'probshow': probshow, 'probout': probout, 'mingap': mingap, 'stateoverlap': stateoverlap, 'hzscale': None, 'hzzscale': None, 'hxscale': None, 'solveMethod': solveMethod }