def diag_Ctilde_o_Sr(self, i): if i < self.Cg.getNumberParams(): r = sp.kron(sp.diag(self.LcGradCgLc(i)), self.Sr()) else: _i = i - self.Cg.getNumberParams() r = sp.kron(sp.diag(self.LcGradCnLc(_i)), sp.ones(self.dim_r)) return r
def __init__(self,data,cov_matrix=False,loc=None): """Parameters ---------- data : array of data, shape=(number points,number dim) If cov_matrix is True then data is the covariance matrix (see below) Keywords -------- cov_matrix : bool (optional) If True data is treated as a covariance matrix with shape=(number dim, number dim) loc : the mean of the data if a covarinace matrix is given, shape=(number dim) """ if cov_matrix: self.dim=data.shape[0] self.n=None self.data_t=None self.mu=loc self.evec,eval,V=sl.svd(data,full_matrices=False) self.sigma=sqrt(eval) self.Sigma=diag(1./self.sigma) self.B=dot(self.evec,self.Sigma) self.Binv=sl.inv(self.B) else: self.n,self.dim=data.shape #the shape of input data self.mu=data.mean(axis=0) #the mean of the data self.data_t=data-self.mu #remove the mean self.evec,eval,V=sl.svd(self.data_t.T,full_matrices=False) #get the eigenvectors (axes of the ellipsoid) data_p=dot(self.data_t,self.evec) #project the data onto the eigenvectors self.sigma=data_p.std(axis=0) #get the spread of the distribution (the axis ratos for the ellipsoid) self.Sigma=diag(1./self.sigma) #the eigenvalue matrix for the ellipsoid equation self.B=dot(self.evec,self.Sigma) #used in the ellipsoid equation self.Binv=sl.inv(self.B) #also useful to have around
def build_sample_nn(): means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) tstdata_temp, trndata_temp = alldata.splitWithProportion(0.25) tstdata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, tstdata_temp.getLength()): tstdata.addSample( tstdata_temp.getSample(n)[0], tstdata_temp.getSample(n)[1] ) trndata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(0, trndata_temp.getLength()): trndata.addSample( trndata_temp.getSample(n)[0], trndata_temp.getSample(n)[1] ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) return trainer, fnn, tstdata
def _LMLgrad_lik(self,hyperparams): """derivative of the likelihood parameters""" logtheta = hyperparams['covar'] try: KV = self.get_covariances(hyperparams) except linalg.LinAlgError: LG.error("exception caught (%s)" % (str(hyperparams))) return 1E6 #loop through all dimensions #logdet term: Kd = 2*KV['Knoise'] dldet = 0.5*(Kd*KV['Si']).sum(axis=0) #quadratic term y_roti = KV['y_roti'] dlquad = -0.5 * (y_roti * Kd * y_roti).sum(axis=0) if VERBOSE: dldet_ = SP.zeros([self.d]) dlquad_ = SP.zeros([self.d]) for d in xrange(self.d): _K = KV['K'] + SP.diag(KV['Knoise'][:,d]) _Ki = SP.linalg.inv(_K) dldet_[d] = 0.5* SP.dot(_Ki,SP.diag(Kd[:,d])).trace() dlquad_[d] = -0.5*SP.dot(self.y[:,d],SP.dot(_Ki,SP.dot(SP.diag(Kd[:,d]),SP.dot(_Ki,self.y[:,d])))) assert (SP.absolute(dldet-dldet_)<1E-3).all(), 'outch' assert (SP.absolute(dlquad-dlquad_)<1E-3).all(), 'outch' LMLgrad = dldet + dlquad RV = {'lik': LMLgrad} return RV
def orlancz(A, v0, k): """ full orthogonalized Lanczos algorithm (Krylov approximation of a matrix) input: A: matrix to approximate v0: initial vector (should be in matrix form) k: number of Krylov steps output: V: matrix (large, N*k) containing the orthogonal vectors H: matrix (small, k*k) containing the Krylov approximation of A Author: Vasile Gradinaru, 21.10.2008 (Zuerich) """ print 'FULL ORTHOGONAL LANCZOS METHOD !' from numpy import finfo, sqrt reps = 10*sqrt(finfo(float).eps) V = mat( v0.copy() / norm(v0) ) alpha = zeros(k) beta = zeros(k+1) for m in xrange(k): #vt = A * V[ :, m] vt = multMv( A , V[ :, m]) if m > 0: vt -= beta[m] * V[:, m-1] alpha[m] = (V[:, m].H * vt )[0, 0] vt -= alpha[m] * V[:, m] beta[m+1] = norm(vt) # reortogonalization h1 = multMv(V.H, vt) vt -= multMv(V, h1) if norm(h1) > reps: vt -= multMv(V, (multMv(V.H,vt))) # V = hstack( (V, vt.copy() / beta[m+1] ) ) rbeta = beta[1:-1] H = diag(alpha) + diag(rbeta, 1) + diag(rbeta, -1) return V, H
def lanczos(A, v0, k): """ Lanczos algorithm (Krylov approximation of a matrix) input: A: matrix to approximate v0: initial vector (should be in matrix form) k: number of Krylov steps output: V: matrix (large, N*k) containing the orthogonal vectors H: matrix (small, k*k) containing the Krylov approximation of A Author: Vasile Gradinaru, 14.12.2007 (Rennes) """ print 'LANCZOS METHOD !' V = mat( v0.copy() / norm(v0) ) alpha = zeros(k) beta = zeros(k+1) for m in xrange(k): vt = multMv( A , V[ :, m]) #vt = A * V[ :, m] if m > 0: vt -= beta[m] * V[:, m-1] alpha[m] = (V[:, m].H * vt )[0, 0] vt -= alpha[m] * V[:, m] beta[m+1] = norm(vt) V = hstack( (V, vt.copy() / beta[m+1] ) ) rbeta = beta[1:-1] H = diag(alpha) + diag(rbeta, 1) + diag(rbeta, -1) return V, H
def LMLdebug(self): """ LML function for debug """ assert self.N*self.P<5000, 'gp2kronSum:: N*P>=5000' y = SP.reshape(self.Y,(self.N*self.P), order='F') V = SP.kron(SP.eye(self.P),self.F) XX = SP.dot(self.Xr,self.Xr.T) K = SP.kron(self.Cr.K(),XX) K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N)) # inverse of K cholK = LA.cholesky(K) Ki = LA.cho_solve((cholK,False),SP.eye(self.N*self.P)) # Areml and inverse Areml = SP.dot(V.T,SP.dot(Ki,V)) cholAreml = LA.cholesky(Areml) Areml_i = LA.cho_solve((cholAreml,False),SP.eye(self.K*self.P)) # effect sizes and z b = SP.dot(Areml_i,SP.dot(V.T,SP.dot(Ki,y))) z = y-SP.dot(V,b) Kiz = SP.dot(Ki,z) # lml lml = y.shape[0]*SP.log(2*SP.pi) lml += 2*SP.log(SP.diag(cholK)).sum() lml += 2*SP.log(SP.diag(cholAreml)).sum() lml += SP.dot(z,Kiz) lml *= 0.5 return lml
def newEpisode(self): if self.learning: params = ravel(self.explorationlayer.module.params) target = ravel(sum(self.history.getSequence(self.history.getNumSequences()-1)[2]) / 500) if target != 0.0: self.gp.addSample(params, target) if len(self.gp.trainx) > 20: self.gp.trainx = self.gp.trainx[-20:, :] self.gp.trainy = self.gp.trainy[-20:] self.gp.noise = self.gp.noise[-20:] self.gp._calculate() # get new parameters where mean was highest max_cov = diag(self.gp.pred_cov).max() indices = where(diag(self.gp.pred_cov) == max_cov)[0] pick = indices[random.randint(len(indices))] new_param = self.gp.testx[pick] # check if that one exists already in gp training set if len(where(self.gp.trainx == new_param)[0]) > 0: # add some normal noise to it new_param += random.normal(0, 1, len(new_param)) self.explorationlayer.module._setParameters(new_param) else: self.explorationlayer.drawRandomWeights() # don't call StateDependentAgent.newEpisode() because it randomizes the params LearningAgent.newEpisode(self)
def get_whitener( A, k ): """Return the matrix W that whitens A, i.e. W^T A W = I. Assumes A is k-rank""" U, D, V = svdk(A, k) Ds = sqrt(D) Di = 1./Ds return U.dot(diag(Di)), U.dot(diag(Ds))
def segmented(): radius = 5 sigmaI = 0.02 sigmaX = 3.0 height = img.shape[0] width = img.shape[1] flatImg = img.flatten() darkImg = flatImg brightImg = flatImg nodes = img.flatten() W = spar.lil_matrix((nodes.size, nodes.size),dtype=float) D = sp.zeros((1,nodes.size)) for row in range(height): for col in range(width): for k in range(row-radius,row+radius): for l in range(col-radius,col+radius): try: w = weight(row,col,k,l) W[row*width+col,k*width+l] = w D[0,row*width+col] += w except: continue D = spar.spdiags(D, 0, nodes.size, nodes.size) Q = D - W D1 = D.todense() Q1 = Q.todense() diags = sp.diag(D1) DminusHalf = sp.diag(diags**-0.5) segQ = sp.dot(sp.dot(DminusHalf, Q1),DminusHalf) vals, vecs = la.eig(segQ) vecind = sp.argsort(vals)[1] theVec = vecs[vecind] for i in range(0,height**2): if theVec[i] < 0: darkImg[i] = 0.0 else: brightImg[i] = 0.0 darkImg = sp.reshape(darkImg, (height,height)) brightImg = sp.reshape(brightImg, (height,height)) return darkImg, flatImg, brightImg
def MikotaPair(n): # Mikota pair acts as a nice test since the eigenvalues # are the squares of the integers n, n=1,2,... x = arange(1,n+1) B = diag(1./x) y = arange(n-1,0,-1) z = arange(2*n-1,0,-2) A = diag(z)-diag(y,-1)-diag(y,1) return A,B
def MikotaPair(n, dtype=np.dtype("d")): # Mikota pair acts as a nice test since the eigenvalues # are the squares of the integers n, n=1,2,... x = np.arange(1,n+1, dtype=dtype) B = diag(1./x) y = np.arange(n-1,0,-1, dtype=dtype) z = np.arange(2*n-1,0,-2, dtype=dtype) A = diag(z)-diag(y,-1)-diag(y,1) return A, B
def exact_moments( A, w ): """Get the exact moments of a components distribution""" k = len(w) P = A.dot( diag( w ) ).dot( A.T ) #T = sum( [ w[i] * tensorify( A.T[i], A.T[i], A.T[i] ) for i in xrange( k ) ] ) T = lambda theta: A.dot( diag( w) ).dot( diag( A.T.dot( theta ) ) ).dot( A.T ) return P, T
def main(): means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata = ClassificationDataSet(2, 1, nb_classes=3) for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) alldata.addSample(input, [klass]) tstdata, trndata = alldata.splitWithProportion( 0.25 ) trndata._convertToOneOfMany( ) tstdata._convertToOneOfMany( ) print "Number of training patterns: ", len(trndata) print "Input and output dimensions: ", trndata.indim, trndata.outdim print "First sample (input, target, class):" print trndata['input'][0], trndata['target'][0], trndata['class'][0] fnn = buildNetwork( trndata.indim, 5, trndata.outdim, outclass=SoftmaxLayer ) trainer = BackpropTrainer( fnn, dataset=trndata, momentum=0.1, verbose=True, weightdecay=0.01) ticks = arange(-3.,6.,0.2) X, Y = meshgrid(ticks, ticks) # need column vectors in dataset, not arrays griddata = ClassificationDataSet(2,1, nb_classes=3) for i in xrange(X.size): griddata.addSample([X.ravel()[i],Y.ravel()[i]], [0]) griddata._convertToOneOfMany() # this is still needed to make the fnn feel comfy for i in range(20): trainer.trainEpochs(1) trnresult = percentError( trainer.testOnClassData(), trndata['class'] ) tstresult = percentError( trainer.testOnClassData( dataset=tstdata ), tstdata['class'] ) print "epoch: %4d" % trainer.totalepochs, \ " train error: %5.2f%%" % trnresult, \ " test error: %5.2f%%" % tstresult out = fnn.activateOnDataset(griddata) out = out.argmax(axis=1) # the highest output activation gives the class out = out.reshape(X.shape) figure(1) ioff() # interactive graphics off clf() # clear the plot hold(True) # overplot on for c in [0,1,2]: here, _ = where(tstdata['class']==c) plot(tstdata['input'][here,0],tstdata['input'][here,1],'o') if out.max()!=out.min(): # safety check against flat field contourf(X, Y, out) # plot the contour ion() # interactive graphics on draw() # update the plot ioff() show()
def exact_moments( alphas, topics ): """Get the exact moments of a components distribution""" a0 = alphas.sum() O = topics P = 1/((a0 +1)*a0) * O.dot( diag( alphas ) ).dot( O.T ) T = lambda theta: 2/((a0+2)*(a0 +1)*a0) * O.dot( diag( O.T.dot( theta ) ) ).dot( diag( alphas ) ).dot( O.T ) return P, T
def get_dummy_data(): means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] X = [] y = [] for n in xrange(400): for klass in range(3): input = multivariate_normal(means[klass],cov[klass]) X.append(input) y.append(klass) return X, y
def _logDerivsFactorSigma(self, samples, mu, invSigma, factorSigma): """ Compute the log-derivatives w.r.t. the factorized covariance matrix components. This implementation should be faster than the one in Vanilla. """ res = zeros((len(samples), self.numDistrParams - self.numParameters)) invA = inv(factorSigma) diagInvA = diag(diag(invA)) for i, sample in enumerate(samples): s = dot(invA.T, (sample - mu)) R = outer(s, dot(invA, s)) - diagInvA res[i] = triu2flat(R) return res
def diag_Ctilde_o_Sr(self, i): np_r = self.Cr.getNumberParams() np_g = self.Cg.getNumberParams() if i < np_r: r = sp.kron(sp.diag(self.LcGradCrLc(i)), self.diagWrWr()) elif i < (np_r + np_g): _i = i - np_r r = sp.kron(sp.diag(self.LcGradCgLc(_i)), self.Sr()) else: _i = i - np_r - np_g r = sp.kron(sp.diag(self.LcGradCnLc(_i)), sp.ones(self.dim_r)) return r
def _update_cache(self): """ Update cache """ cov_params_have_changed = self.Cr.params_have_changed or self.Cn.params_have_changed if self.Xr_has_changed: start = TIME.time() """ Row SVD Bg + Noise """ Urstar,S,V = NLA.svd(self.Xr) self.cache['Srstar'] = SP.concatenate([S**2,SP.zeros(self.N-S.shape[0])]) self.cache['Lr'] = Urstar.T self.mean.setRowRotation(Lr=self.cache['Lr']) smartSum(self.time,'cache_XXchanged',TIME.time()-start) smartSum(self.count,'cache_XXchanged',1) if cov_params_have_changed: start = TIME.time() """ Col SVD Noise """ S2,U2 = LA.eigh(self.Cn.K()+self.offset*SP.eye(self.P)) self.cache['Sc2'] = S2 US2 = SP.dot(U2,SP.diag(SP.sqrt(S2))) USi2 = SP.dot(U2,SP.diag(SP.sqrt(1./S2))) """ Col SVD region """ A = SP.reshape(self.Cr.getParams(),(self.P,self.rank),order='F') Astar = SP.dot(USi2.T,A) Ucstar,S,V = NLA.svd(Astar) self.cache['Scstar'] = SP.concatenate([S**2,SP.zeros(self.P-S.shape[0])]) self.cache['Lc'] = SP.dot(Ucstar.T,USi2.T) """ pheno """ self.mean.setColRotation(self.cache['Lc']) if cov_params_have_changed or self.Xr_has_changed: """ S """ self.cache['s'] = SP.kron(self.cache['Scstar'],self.cache['Srstar'])+1 self.cache['d'] = 1./self.cache['s'] self.cache['D'] = SP.reshape(self.cache['d'],(self.N,self.P), order='F') """ pheno """ self.cache['LY'] = self.mean.evaluate() self.cache['DLY'] = self.cache['D']*self.cache['LY'] smartSum(self.time,'cache_colSVDpRot',TIME.time()-start) smartSum(self.count,'cache_colSVDpRot',1) self.Y_has_changed = False self.Xr_has_changed = False self.Cr.params_have_changed = False self.Cn.params_have_changed = False
def GetEigSys(filename,gsfile=None,Nsamp=1,channel=None,wavefile=None,q=None): if type(filename)==str: filename=[filename] hfile=h5py.File(filename[0],'r') attr=GetAttr(filename[0]) if channel==None: channel=attr['channel'] dpath,args=GetStat(filename,Nsamp) dat=sc.array(hfile["/rank-1/data-0"]) hfile.close() N=int(sc.shape(dat)[0]/2) L=attr['L'] shift=None if 'phasex' in attr.keys(): shift=[attr['phasex']/2.0,attr['phasey']/2.0] else: shift=[attr['phase_shift_x']/2.0,attr['phase_shift_y']/2.0] H=sc.zeros([Nsamp,N,N],complex) O=sc.zeros([Nsamp,N,N],complex) E=sc.zeros([Nsamp,N]) V=sc.zeros([Nsamp,N,N],complex) for sample,b in enumerate(args): for d in b: hfile=h5py.File(dpath[d][0],'r') dat=hfile[dpath[d][1]] H[sample,:,:]+=dat[0:N,0:2*N:2]+1j*dat[0:N,1:2*N:2] O[sample,:,:]+=dat[N:2*N,0:2*N:2]+1j*dat[N:2*N,1:2*N:2] hfile.close() H[sample,:,:]=0.5*(H[sample,:,:]+sc.conj(H[sample,:,:].T))/len(b) O[sample,:,:]=0.5*(O[sample,:,:]+sc.conj(O[sample,:,:].T))/len(b) if channel=='groundstate': return H fs=None refstate=sc.zeros(2*L*L) refstate[0::2]=1 if wavefile==None: fs=GetFermiSigns(filename[0],refstate,channel=channel) else: fs=GetFermiSigns(wavefile,refstate,channel=channel) for s in range(sc.shape(H)[0]): H[s,:,:]=sc.dot(sc.diag(fs),sc.dot(H[s,:,:],sc.diag(fs))) O[s,:,:]=sc.dot(sc.diag(fs),sc.dot(O[s,:,:],sc.diag(fs))) ren=sc.ones(Nsamp) if gsfile!=None: ren=RenormalizeFactor(filename,gsfile,Nsamp=1,channel=channel,O=O,q=q) print('{0} pair of (H,O) matrices loaded, now diagonalize'.format(sc.shape(H)[0])) H=sc.einsum('ijk,i->ijk',H,ren) O=sc.einsum('ijk,i->ijk',O,ren) for s in range(sc.shape(H)[0]): E[s,:],V[s,:,:]=vln.geneigh(sc.squeeze(H[s,:,:]),sc.squeeze(O[s,:,:])) print('diagonalization finished') return H,O,E,V
def ampfit(data, covariance, theory, rank_thresh=1e-12, diag_only=False): """Fits the amplitude of the theory curve to the data. Finds `amp` such that `amp`*`theory` is the best fit to `data`. Returns ------- amp : float Fitted amplitude. error : float Error on fitted amplitude. """ data = sp.asarray(data) covariance = sp.asarray(copy.deepcopy(covariance)) theory = sp.asarray(theory) if len(data.shape) != 1: raise ValueError("`data` must be a 1D vector.") n = len(data) if data.shape != theory.shape: raise ValueError("`theory` must be the same shape as `data`.") if covariance.shape != (n,n): msg = "`covariance` must be a square matrix compatible with data." raise ValueError(msg) if diag_only: covariance = sp.diag(sp.diag(covariance)) print data print sp.diag(sp.sqrt(covariance)) covariance_inverse = linalg.inv(covariance) weighted_data = sp.dot(covariance_inverse, data) amp = sp.dot(theory, weighted_data) normalization = sp.dot(covariance_inverse, theory) normalization = sp.dot(theory, normalization) amp /= normalization error = sp.sqrt(1/normalization) u, s, v = np.linalg.svd(covariance) dof = np.sum(s > rank_thresh) resid = data - amp * theory chi2 = sp.dot(covariance_inverse, resid) chi2 = sp.dot(resid, chi2) pte = sp.stats.chisqprob(chi2, dof - 1) return {"amp":amp, "error":error, \ "chi2":chi2, "dof":dof - 1, \ "pte":pte}
def create_dataset(): '''Create a random dataset to train and test the network on ''' means = [(-1,0),(2,4),(3,1)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] alldata=ClassificationDataSet(2,1,nb_classes=3) for n in xrange(400): for klass in range(3): input=multivariate_normal(means[klass],cov[klass]) alldata.addSample(input,[klass]) tstdata,trndata=alldata.splitWithProportion(0.25) trndata._convertToOneOfMany() tstdata._convertToOneOfMany() return (trndata,tstdata)
def derivative_matrix(G, grid_spacing=1.0): ''' Returns a (G-1)xG dimensional derivative matrix. ''' # Make sure G is a positive integer assert(G == int(G) and G >= 2) # Create matrix tmp_mat = sp.diag(sp.ones(G),0) + sp.diag(-1.0*sp.ones(G-1),-1) right_partial = tmp_mat[1:,:]/grid_spacing return sp.mat(right_partial)
def get_whitener( A, k ): """Return the matrix W that whitens A, i.e. W^T A W = I. Assumes A is k-rank""" assert( mrank( A ) == k ) # If A is PSD U, S, _ = svdk( A, k ) W, Wt = U.dot( diag( sc.sqrt(S)**-1 ) ) , ( diag( sc.sqrt(S) ) ).dot( U.T ) # assert( sc.allclose( W.T.dot( A ).dot( W ), sc.eye( k ) ) ) # assert( sc.allclose( Wt.T.dot( Wt ), A ) ) return W, Wt
def generateClassificationData(size, nClasses=3): """ generate a set of points in 2D belonging to two or three different classes """ if nClasses==3: means = [(-1,0),(2,4),(3,1)] else: means = [(-2,0),(2,1),(6,0)] cov = [diag([1,1]), diag([0.5,1.2]), diag([1.5,0.7])] dataset = ClassificationDataSet(2, 1, nb_classes=nClasses) for _ in xrange(size): for c in range(3): input = multivariate_normal(means[c],cov[c]) dataset.addSample(input, [c%nClasses]) dataset.assignClasses() return dataset
def gap(data, refs=None, nrefs=20, ks=range(1,11), method=None): shape = data.shape if refs is None: tops = data.max(axis=0) bots = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops-bots)) rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs)) for i in range(nrefs): rands[:, :, i] = rands[:, :, i]*dists+bots else: rands = refs gaps = scipy.zeros((len(ks),)) for (i, k) in enumerate(ks): g1 = method(n_clusters=k).fit(data) (kmc, kml) = (g1.cluster_centers_, g1.labels_) disp = sum([euclidean(data[m, :], kmc[kml[m], :]) for m in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2],)) for j in range(rands.shape[2]): g2 = method(n_clusters=k).fit(rands[:, :, j]) (kmc, kml) = (g2.cluster_centers_, g2.labels_) refdisps[j] = sum([euclidean(rands[m, :, j], kmc[kml[m],:]) for m in range(shape[0])]) gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp) return gaps
def _LML_covar(self, hyperparams): """ log marginal likelihood contributions from covariance hyperparameters """ try: KV = self.get_covariances(hyperparams) except linalg.LinAlgError: LG.error("exception caught (%s)" % (str(hyperparams))) return 1E6 #all in one go #negative log marginal likelihood, see derivations lquad = 0.5* (KV['y_rot']*KV['Si']*KV['y_rot']).sum() ldet = 0.5*-SP.log(KV['Si'][:,:]).sum() LML = 0.5*self.n*self.d * SP.log(2*SP.pi) + lquad + ldet if VERBOSE: #1. slow and explicit way lmls_ = SP.zeros([self.d]) for i in xrange(self.d): _y = self.y[:,i] sigma2 = SP.exp(2*hyperparams['lik']) _K = KV['K'] + SP.diag(KV['Knoise'][:,i]) _Ki = SP.linalg.inv(_K) lquad_ = 0.5 * SP.dot(_y,SP.dot(_Ki,_y)) ldet_ = 0.5 * SP.log(SP.linalg.det(_K)) lmls_[i] = 0.5 * self.n* SP.log(2*SP.pi) + lquad_ + ldet_ assert SP.absolute(lmls_.sum()-LML)<1E-3, 'outch' return LML
def initialize(self,data,random=False): self.data = data self.n_dim = data.shape[1] if random: mins = sp.zeros(self.n_dim) maxes = sp.zeros(self.n_dim) sds = sp.zeros(self.n_dim) centers = sp.zeros((self.n_components,self.n_dim)) for i in xrange(self.n_dim): mins[i] = min(self.data[:,i]) maxes[i] = max(self.data[:,i]) sds[i] = sp.std(self.data[:,i]) centers[:,i] = sp.random.uniform(mins[i],maxes[i],self.n_components) self.comp = sp.ones(self.n_components)/float(self.n_components) + sp.random.uniform(-1./self.n_components,1./self.n_components,self.n_components) self.comp /= sp.sum(self.comp) covars = sp.array([sp.diag(sds**2) for i in xrange(self.n_components)]) self.centers = centers self.covars = covars else: clust = cluster.KMeans(self.n_components) clust.fit(self.data) self.centers = sp.copy(clust.cluster_centers_) labels = sp.copy(clust.labels_) self.covars = sp.zeros((self.n_components,self.n_dim,self.n_dim)) self.comp = sp.zeros(self.n_components) for i in xrange(self.n_components): inds = labels == i temp = self.data[inds,:] self.covars[i,:,:] = sp.dot(temp.T,temp) self.comp[i] = sum(inds)/float(self.data.shape[0])
def learn(self, X, t, tol=0.01, amax=1e10): u"""学習""" N = X.shape[0] a = sp.ones(N+1) # hyperparameter b = 1.0 phi = sp.ones((N, N+1)) # design matrix phi[:,1:] = [[self._kernel(xi, xj) for xj in X] for xi in X] diff = 1 while diff >= tol: sigma = spla.inv(sp.diag(a) + b * sp.dot(phi.T, phi)) m = b * sp.dot(sigma, sp.dot(phi.T, t)) gamma = sp.ones(N+1) - a * sigma.diagonal() anew = gamma / (m * m) bnew = (N - gamma.sum()) / sp.square(spla.norm(t - sp.dot(phi, m))) anew[anew >= amax] = amax adiff, bdiff = anew - a, bnew - b diff = (adiff * adiff).sum() + bdiff * bdiff a, b = anew, bnew print ".", self._a = a self._b = b self._X = X self._m = m self._sigma = sigma self._amax = amax
def _ar_model_qr(data, p=1): """QR factorization for a (multivariate) zero-mean AR model :Parameters: data : ndarray data with observations on the rows and variables on the columns p : int or list the model order, how many samples to regress over """ # inits n, m = data.shape # observations, channels ne = n - p # number of block equations of size m np = m * p # number of parameter vectors of size m K = N.zeros((ne, np + m)) # the lag shifted data matrix # compute predictors for i in xrange(p): K[:, m * i:m * (i + 1)] = data[p - i - 1:n - i - 1, :] K[:, np:np + m] = data[p:n, :] # contition the matrix and factorize scale = N.sqrt(((np + m) ** 2 + np + m + 1) * EPS) R = NL.qr( N.concatenate(( K, scale * N.diag([NL.norm(K[:, i]) for i in xrange(K.shape[1])]) )), mode='r' ) # return del K return R
def ElasticRod(n): # Fixed-free elastic rod L = 1.0 le=L/n rho = 7.85e3 S = 1.e-4 E = 2.1e11 mass = rho*S*le/6. k = E*S/le A = k*(diag(r_[2.*ones(n-1),1])-diag(ones(n-1),1)-diag(ones(n-1),-1)) B = mass*(diag(r_[4.*ones(n-1),2])+diag(ones(n-1),1)+diag(ones(n-1),-1)) return A,B
def __getitem__(self, key): #print key xslice, yslice, zslice = key #cut region out of data stack dataROI = self.data[xslice, yslice, zslice] #average in z dataMean = dataROI.mean(2) #generate grid to evaluate function on X = 1e3 * self.metadata.voxelsize.x * scipy.mgrid[xslice] Y = 1e3 * self.metadata.voxelsize.y * scipy.mgrid[yslice] #estimate some start parameters... A = dataMean.max() - dataMean.min() #amplitude x0 = X.mean() y0 = Y.mean() startParameters = [A, x0, y0, 250 / 2.35, dataMean.min(), .001, .001] #estimate errors in data nSlices = dataROI.shape[2] sigma = scipy.sqrt(self.metadata.CCD.ReadNoise**2 + (self.metadata.CCD.noiseFactor**2) * self.metadata.CCD.electronsPerCount * dataMean / nSlices) / self.metadata.CCD.electronsPerCount #do the fit #(res, resCode) = FitModel(f_gauss2d, startParameters, dataMean, X, Y) #(res, cov_x, infodict, mesg, resCode) = FitModelWeighted(self.fitfcn, startParameters, dataMean, sigma, X, Y) #(res, cov_x, infodict, mesg, resCode) = self.solver(self.fitfcn, startParameters, dataMean, sigma, X, Y) (res, ret, cov_x, nIters, resCode) = fitGauss(startParameters, X, Y, dataMean.T.ravel(), 1.0 / sigma.T.ravel()) fitErrors = None try: fitErrors = scipy.sqrt(scipy.diag(cov_x)) except Exception: pass return GaussianFitResultR(res, self.metadata, (xslice, yslice, zslice), resCode, fitErrors)
def predict(self, hyperparams, Xstar_r, compute_cov = False, debugging = False): """ predict on Xstar """ self._update_inputs(hyperparams) KV = self.get_covariances(hyperparams,debugging=debugging) self.covar_r.Xcross = Xstar_r Kstar_r = self.covar_r.Kcross(hyperparams['covar_r']) Kstar_c = self.covar_c.K(hyperparams['covar_c']) KinvY = SP.dot(KV['U_r'],SP.dot(KV['Ytilde'],KV['U_c'].T)) Ystar = SP.dot(Kstar_r.T,SP.dot(KinvY,Kstar_c)) Ystar = unravel(Ystar,self.covar_r.n_cross,self.t) if debugging: Kstar = SP.kron(Kstar_c,Kstar_r) Ynaive = SP.dot(Kstar.T,KV['alpha']) Ynaive = unravel(Ynaive,self.covar_r.n_cross,self.t) assert SP.allclose(Ystar,Ynaive), 'ouch, prediction does not work out' Ystar_covar = [] if compute_cov: CU = fast_dot(Kstar_c, KV['U_c']) s_rev = 1./KV['S'] Ystar_covar = SP.zeros([Xstar_r.shape[0], self.Y.shape[1]]) printProgressBar(0, Xstar_r.shape[0], prefix = 'Computing perdiction varaince:', suffix = 'Complete', length = 20) for i in range(Xstar_r.shape[0]): R_star_star = self.covar_r.K(hyperparams['covar_r'], SP.expand_dims(Xstar_r[i,:],axis=0)) self.covar_r.Xcross = SP.expand_dims(Xstar_r[i,:],axis=0) R_tr_star = self.covar_r.Kcross(hyperparams['covar_r']) RU = SP.dot(R_tr_star.T, KV['U_r']) q = SP.kron(SP.diag(Kstar_c), R_star_star) t = SP.zeros([self.t]) for j in range(self.t): temp = SP.kron(CU[j,:], RU) t[j,] = SP.sum((s_rev * temp).T * temp.T, axis = 0) Ystar_covar[i,:] = q - t if (i + 1) % (Xstar_r.shape[0]/10) == 0: printProgressBar(i+1, Xstar_r.shape[0], prefix = 'Computing perdiction varaince:', suffix = 'Complete', length = 20) self.covar_r.Xcross = Xstar_r return Ystar, Ystar_covar
def stable_cho_factor(x, tiny=_TINY): """ NAME: stable_cho_factor PURPOSE: Stable version of the cholesky decomposition INPUT: x - (sc.array) positive definite matrix tiny - (double) tiny number to add to the covariance matrix to make the decomposition stable (has a default) OUTPUT: (L,lowerFlag) - output from scipy.linalg.cho_factor for lower=True REVISION HISTORY: 2009-09-25 - Written - Bovy (NYU) """ return linalg.cho_factor(x + sc.sum(sc.diag(x)) * tiny * sc.eye(x.shape[0]), lower=True)
def invx(X): """ calculates the inverse for X and the determinant of X using SVD decomposition. This was used to sanity check the robust cholesky implementation. :param X: :return: """ U, S, Vh = sp.linalg.svd(X) if min(S) < 1.e-6: delta = 1.e-6 - min(S) S = S + delta Sinv = sp.diag(1. / S) logdet = np.sum(np.log(S)) return sp.dot(U, sp.dot(Sinv, Vh)), logdet
def test_real_lu(self): """Getting factors of real matrix""" umfpack = um.UmfpackContext("di") for A in self.real_matrices: umfpack.numeric(A) (L,U,P,Q,R,do_recip) = umfpack.lu(A) L = L.todense() U = U.todense() A = A.todense() if not do_recip: R = 1.0/R R = matrix(diag(R)) P = eye(A.shape[0])[P,:] Q = eye(A.shape[1])[:,Q] assert_array_almost_equal(P*R*A*Q,L*U)
def _LML_covar(self,hyperparams): """ log marginal likelihood """ try: KV = self.get_covariances(hyperparams) except LA.LinAlgError: LG.error('linalg exception in _LML_covar') return 1E6 alpha = KV['alpha'] L = KV['L'] lml_quad = 0.5 * (alpha*self.Y).sum() lml_det = self.t *SP.log(SP.diag(L)).sum() lml_const = 0.5*self.n*self.t*SP.log(2*SP.pi) LML = lml_quad + lml_det + lml_const return LML
def amuse(X, Y, evs=5): ''' AMUSE implementation of TICA, see TICA documentation. :return: eigenvalues d and corresponding eigenvectors Phi containing the coefficients for the eigenfunctions ''' U, s, _ = _sp.linalg.svd(X, full_matrices=False) S_inv = _sp.diag(1 / s) Xp = S_inv @ U.T @ X Yp = S_inv @ U.T @ Y K = Xp @ Yp.T d, W = sortEig(K, evs) Phi = U @ S_inv @ W # normalize eigenvectors for i in range(Phi.shape[1]): Phi[:, i] /= _sp.linalg.norm(Phi[:, i]) return d, Phi
def K(self, logtheta, x1, x2=None): if x2 is None: x2 = x1 # 2. exponentiate params: # L = SP.exp(2*logtheta[0:self.n_dimensions]) # RV = SP.zeros([x1.shape[0],x2.shape[0]]) # for i in xrange(self.n_dimensions): # iid = self.dimension_indices[i] # RV+=L[i]*SP.dot(x1[:,iid:iid+1],x2[:,iid:iid+1].T) if self.n_dimensions > 0: M = SP.diag(SP.exp(2 * logtheta[0:self.n_dimensions])) RV = SP.dot(SP.dot(x1[:, self.dimension_indices], M), x2[:, self.dimension_indices].T) else: RV = SP.zeros([x1.shape[0], x2.shape[0]]) return RV
def LML(self,params=None): """ evalutes the log marginal likelihood for the given hyperparameters hyperparams """ if params is not None: self.setParams(params) KV = self._update_cache() alpha = KV['alpha'] L = KV['L'] lml_quad = 0.5 * (alpha*self.Y).sum() lml_det = self.t *SP.log(SP.diag(L)).sum() lml_const = 0.5*self.n*self.t*SP.log(2*SP.pi) LML = lml_quad + lml_det + lml_const return LML
def local_minimization(par, par_indexes, par_fixed, data, verbose=True): """ Minimize the residuals using the Levenberg-Marquard algorithm. """ func = chi2.make_calc_residuals(verbose=verbose) args = (par_indexes, par_fixed, data) try: out = opt.leastsq(func, par, args=args, full_output=True, ftol=1e-9, xtol=1e-9, maxfev=100000, epsfcn=1e-10, factor=0.1) except TypeError: sys.stderr.write(' -- Error encountered during minimization:\n') sys.stderr.write(' ----> {:s}\n'.format(sys.exc_info()[1])) sys.stderr.write( ' -- Check that all parameters are correctly initialized.\n') writing.dump_parameters(par, par_indexes, par_fixed, data) exit() par, pcov, _infodict, errmsg, ier = out if ier not in [1, 2, 3, 4]: print(''.join(('Optimal parameters not found: ', errmsg))) data_nb, par_nb = len(data), len(par) reduced_chi2 = chi2.calc_reduced_chi2(par, par_indexes, par_fixed, data) if (data_nb > par_nb) and pcov is not None: pcov = pcov * reduced_chi2 par_err = sp.sqrt(sp.diag(pcov)) else: par_err = par return par, par_err, reduced_chi2
def ElasticRod(n, dtype=np.dtype("d")): # Fixed-free elastic rod L = 1.0 le = L/n rho = 7.85e3 S = 1.e-4 E = 2.1e11 mass = rho*S*le/6. k = E*S/le c = math.sqrt(k) A = c*(diag(r_[2.*ones(n-1, dtype=dtype),1])-diag(ones(n-1, dtype=dtype),1)-diag(ones(n-1, dtype=dtype),-1)) B = mass/c*(diag(r_[4.*ones(n-1, dtype=dtype),2])+diag(ones(n-1, dtype=dtype),1)+diag(ones(n-1, dtype=dtype),-1)) return A, B
def __init__(self, panels, variable_names=None): ''' A panel series is a time series of panels. Generally, it should not be constructed directly but rather from a builder method such as PanelSeries.from_csv :param panels: :param variable_names: names for the variables in the series. The first two names are the names of time and group vars. ''' # if the time index is numeric, need to convert to float so that data will be properly sorted if all([is_numeric(panel.time) for panel in panels]): for panel in panels: panel.time = float(panel.time) # sort panel by time variable self.data = sorted([(panel.time, panel) for panel in panels], key=lambda x: x[0]) self.times = [time for time, panel in self.data] self.variable_names = variable_names # verify that we have balanced individuals groups = [group.name for group in self.data[0][1].data] if not all([ groups == [group.name for group in panel.data] for (_, panel) in self.data ]): raise ValueError( "Currently, all panels must have the same members in the same order." ) self.groups = groups # # compute group masks and check variables # group_counts_mask = [] _, n_vars = self.data[0][1].data[0].data.shape self.n_variables = n_vars for (time, panel) in self.data: group_sizes = [group.size for group in panel.data] var_counts = [group.n_vars for group in panel.data] if not all([v == n_vars for v in var_counts]): raise ValueError( "Must have same number of variables for each individual!") group_counts_mask.append(sp.diag(group_sizes)) self.group_counts_mask = group_counts_mask
def test_lowrank_ard(self): theta = SP.array(SP.random.randn(1 + self.n_train)**2) theta_hat = SP.exp(2 * theta) _K = theta_hat[0] * SP.dot(self.Xtrain, self.Xtrain.T) + SP.diag( theta_hat[1:]) _Kcross = theta_hat[0] * SP.dot(self.Xtrain, self.Xtest.T) _Kgrad_theta = 2 * theta_hat[0] * SP.dot(self.Xtrain, self.Xtrain.T) cov = lowrank.LowRankArdCF(n_dimensions=self.n_dimensions, n_hyperparameters=self.n_train + 1) cov.X = self.Xtrain cov.Xcross = self.Xtest K = cov.K(theta) Kcross = cov.Kcross(theta) assert SP.allclose(K, _K), 'ouch, covariance matrix is wrong' assert SP.allclose(Kcross, _Kcross), 'ouch, cross covariance matrix is wrong' assert SP.allclose(_Kgrad_theta, cov.Kgrad_theta( theta, 0)), 'ouch gradient with respect to theta[0] is wrong' # gradient with respect to parameters of the diagonal matrix for i in range(self.n_train): Kgrad_theta = cov.Kgrad_theta(theta, i + 1) _Kgrad_theta = SP.zeros(Kgrad_theta.shape) _Kgrad_theta[i, i] = 2 * theta_hat[i + 1] assert SP.allclose( Kgrad_theta, _Kgrad_theta ), 'ouch gradient with respect to theta[%d] is wrong' % (i + 1) # gradient with respect to latent factors for i in range(self.n_dimensions): for j in range(self.n_train): Xgrad = SP.zeros(self.Xtrain.shape) Xgrad[j, i] = 1 _Kgrad_x = theta_hat[0] * (SP.dot(Xgrad, self.Xtrain.T) + SP.dot(self.Xtrain, Xgrad.T)) Kgrad_x = cov.Kgrad_x(theta, i, j) assert SP.allclose( Kgrad_x, _Kgrad_x ), 'ouch, gradient with respect to x is wrong for entry [%d,%d]' % ( i, j)
def gap_stats0(data, refs=None, nrefs=20, ks=range(1, 20)): """ Compute the Gap statistic for an nxm dataset in data. Either give a precomputed set of reference distributions in refs as an (n,m,k) scipy array, or state the number k of reference distributions in nrefs for automatic generation with a uniformed distribution within the bounding box of data. Give the list of k-values for which you want to compute the statistic in ks. Reference --------- https://gist.github.com/michiexile/5635273#file-gap-py """ import scipy import scipy.cluster.vq import scipy.spatial.distance dst = scipy.spatial.distance.euclidean shape = data.shape if refs == None: tops = data.max(axis=0) bots = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops - bots)) rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs)) for i in range(nrefs): rands[:, :, i] = rands[:, :, i] * dists + bots else: rands = refs gaps = scipy.zeros((len(ks), )) for (i, k) in enumerate(ks): (kmc, kml) = scipy.cluster.vq.kmeans2(data, k) disp = sum([dst(data[m, :], kmc[kml[m], :]) for m in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2], )) for j in range(rands.shape[2]): (kmc, kml) = scipy.cluster.vq.kmeans2(rands[:, :, j], k) refdisps[j] = sum( [dst(rands[m, :, j], kmc[kml[m], :]) for m in range(shape[0])]) # gaps[i] = scipy.log(scipy.mean(refdisps))-scipy.log(disp) gaps[i] = scipy.mean(scipy.log(refdisps)) - scipy.log(disp) return gaps
def test_complex_int64_lu(self): # Getting factors of complex matrix with long indices umfpack = um.UmfpackContext("zl") for A in self.complex_int64_matrices: umfpack.numeric(A) (L, U, P, Q, R, do_recip) = umfpack.lu(A) L = L.todense() U = U.todense() A = A.todense() if not do_recip: R = 1.0 / R R = matrix(diag(R)) P = eye(A.shape[0])[P, :] Q = eye(A.shape[1])[:, Q] assert_array_almost_equal(P * R * A * Q, L * U)
def compute_linear_kernel(X, idx=None, jitter=1e-3, standardize=True): """ compute linear kernel X : SNP data [N x F] idx : boolean vector of size F, indicating which SNPs to use to build the covariance matrix standardize : if True (default), covariance matrix is standardized to unit variance jitter : adds jitter to the diagonal of the covariance matrix (default: 1e-3) """ N = X.shape[0] if idx is not None: K = sp.dot(X[:, idx], X[:, idx].T) else: K = sp.dot(X, X.T) if standardize: K /= sp.diag(K).mean() if jitter: K += jitter * sp.eye(N) return K
def gap(data, refs=None, nrefs=20, ks=range(1,11)): """ I: NumPy array, reference matrix, number of reference boxes, number of clusters to test O: Gaps NumPy array, Ks input list Give the list of k-values for which you want to compute the statistic in ks. By Gap Statistic from Tibshirani, Walther. """ shape = data.shape if not refs: tops = data.max(axis=0) bottoms = data.min(axis=0) dists = scipy.matrix(scipy.diag(tops - bottoms)) rands = scipy.random.random_sample(size=(shape[0], shape[1], nrefs)) for i in range(nrefs): rands[:, :, i] = rands[:, :, i] * dists + bottoms else: rands = refs gaps = scipy.zeros((len(ks),)) for (i,k) in enumerate(ks): k_means_args_dict['n_clusters'] = k kmeans = k_means(**k_means_args_dict) kmeans.fit(data) (cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_ disp = sum([dst(data[current_row_index, :], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])]) refdisps = scipy.zeros((rands.shape[2],)) for j in range(rands.shape[2]): kmeans = k_means(**k_means_args_dict) kmeans.fit(rands[:, : ,j]) (cluster_centers, point_labels) = kmeans.cluster_centers_, kmeans.labels_ refdisps[j] = sum([dst(rands[current_row_index,:,j], cluster_centers[point_labels[current_row_index],:]) for current_row_index in range(shape[0])]) #let k be the index of the array 'gaps' gaps[i] = scipy.mean(scipy.log(refdisps)) - scipy.log(disp) return ks, gaps
def predict(self,hyperparams,Xstar=None,Xstar_r=None,**kwargs): """ predict on Xstar """ if (Xstar_r!=None).all(): Xstar = Xstar_r if (Xstar != None).all(): self.covar.Xcross = Xstar KV = self.get_covariances(hyperparams) Kstar = self.covar.Kcross(hyperparams['covar']) Ystar = SP.dot(Kstar.T,KV['alpha']) # Computing the prediction covariance(should be double checked) R_star_star = SP.exp(2 * hyperparams['covar']) * SP.dot(Xstar, Xstar.T) v = LA.cho_solve((KV['L'], True), Kstar) Ystar_cov = R_star_star - Kstar.T.dot(v) return Ystar.flatten(), SP.diag(Ystar_cov)
def spectral_decomp(tradeDate, C, rtns, I=10000): '''Singular value decomposition, C= covariance or correaltion matrix.''' hold_prd = 1 C = C.loc[tradeDate, :, :] # covariance panel [S, V, D] = svd(C) # singular value decomposiiton A = np.dot(S, np.sqrt(diag(V))) z = np.random.randn( I, len(C)) # random normals to be correlated throug spectral decomp dr = hold_prd * (rtns.iloc[-1, :]) / yr_basis # correlated random normals X = np.dot(z, A.T) X = pd.DataFrame(X, columns=C.columns) dQ = X + dr return dQ
def polymom_all_constraints(self, maxdeg): d = self.d xis = self.sym_means covs = self.sym_covs exprs = self.polymom_all_expressions(maxdeg) import mompy as mp meas = mp.Measure(xis+covs) for i in range(self.k): means,covs = self.means.T[ i ], sc.diag(self.sigmas[ i ]) meas += (self.weights[i], means.tolist() + covs.tolist()) meas.normalize() for i,expr in enumerate(exprs): exprval = meas.integrate(expr) exprs[i] = expr - exprval return exprs
def ElasticRod(n): """Build the matrices for the generalized eigenvalue problem of the fixed-free elastic rod vibration model. """ L = 1.0 le = L/n rho = 7.85e3 S = 1.e-4 E = 2.1e11 mass = rho*S*le/6. k = E*S/le A = k*(diag(r_[2.*ones(n-1), 1])-diag(ones(n-1), 1)-diag(ones(n-1), -1)) B = mass*(diag(r_[4.*ones(n-1), 2])+diag(ones(n-1), 1)+diag(ones(n-1), -1)) return A, B
def __getitem__(self, key): #print key xslice, yslice, zslice = key #cut region out of data stack dataROI = self.data[xslice, yslice, zslice] #average in z dataMean = dataROI.mean(2) #generate grid to evaluate function on X,Y = scipy.mgrid[xslice, yslice].astype('d') #estimate some start parameters... A = dataMean.max() - dataMean.min() #amplitude x0 = (xslice.start + xslice.stop - 1)/2 y0 = (yslice.start + yslice.stop - 1)/2 startParameters = [A, x0, y0, 1.7, dataMean.min(), .001, .001] #print dataMean.shape #print X.shape #estimate errors in data nSlices = dataROI.shape[2] #sigma = (4 + scipy.sqrt(dataMean))/sqrt(nSlices) sigma = scipy.sqrt(self.ccdReadNoise**2 + (self.noiseFactor**2)*self.electronsPerCount*dataMean/nSlices)/self.electronsPerCount #do the fit #(res, resCode) = FitModel(f_gauss2d, startParameters, dataMean, X, Y) (res, cov_x, infodict, mesg, resCode) = FitModelWeighted(f_gauss2d, startParameters, dataMean, sigma, X, Y) #print cov_x #print infodict['fjac'] #print mesg #print resCode #return GaussianFitResult(res, self.metadata, (xslice, yslice, zslice), resCode) fitErrors=None try: fitErrors = scipy.sqrt(scipy.diag(cov_x)*(infodict['fvec']*infodict['fvec']).sum()/(len(dataMean.ravel())- len(res))) except Exception as e: pass return GaussianFitResult(res, self.metadata, (xslice, yslice, zslice), resCode, fitErrors)
def outlier_removed_fit(m, w=None, n_iter=10, polyord=7): """ Remove outliers using fited data. Args: m (:obj:`numpy array`): Phase curve. n_iter (:obj:'int'): Number of iteration outlier removal polyorder (:obj:'int'): Order of polynomial used. Returns: fit (:obj:'numpy array'): Curve with outliers removed """ if w is None: w = sp.ones_like(m) W = sp.diag(sp.sqrt(w)) m2 = sp.copy(m) tv = sp.linspace(-1, 1, num=len(m)) A = sp.zeros([len(m), polyord]) for j in range(polyord): A[:, j] = tv**(float(j)) A2 = sp.dot(W, A) m2w = sp.dot(m2, W) fit = None for i in range(n_iter): xhat = sp.linalg.lstsq(A2, m2w)[0] fit = sp.dot(A, xhat) # use gradient for central finite differences which keeps order resid = sp.gradient(fit - m2) std = sp.std(resid) bidx = sp.where(sp.absolute(resid) > 2.0 * std)[0] for bi in bidx: A2[bi, :] = 0.0 m2[bi] = 0.0 m2w[bi] = 0.0 if debug_plot: plt.plot(m2, label="outlier removed") plt.plot(m, label="original") plt.plot(fit, label="fit") plt.legend() plt.ylim([sp.minimum(fit) - std * 3.0, sp.maximum(fit) + std * 3.0]) plt.show() return (fit)
def solve_mixture_model(model, data): """ Whiten and unwhiten appropriately """ d = model["d"] # Get moments moments = model.empirical_moments(data, model.observed_monomials(3)) M2 = zeros((d, d)) M3 = zeros((d, d, d)) for i in xrange(d): for j in xrange(d): xij = sp.sympify('x%d * x%d' % (i + 1, j + 1)) M2[i, j] = moments[xij] for k in xrange(d): xijk = sp.sympify('x%d * x%d * x%d' % (i + 1, j + 1, k + 1)) M3[i, j, k] = moments[xijk] k = model["k"] # Symmetrize M2, M3 = symmetrize(M2), symmetrize(M3) assert symmetric_skew(M2) < 1e-2 assert symmetric_skew(M3) < 1e-2 # Whiten W, Wt = get_whitener(M2, k) M3_ = einsum('ijk,ia,jb,kc->abc', M3, W, W, W) pi_, M_, _, _ = candecomp(M3_, k) # Unwhiten M M_ = Wt.dot(M_.dot(diag(pi_))) pi_ = 1. / pi_**2 # "Project" onto simplex pi_ = make_distribution(abs(pi_)) M_ = array([make_distribution(col) for col in M_.T]).T return pi_, M_
def GetWHIM12(CoordinateMatrix, AtomLabel, proname='u'): """ ################################################################# WHIM descriptors --->E3u ################################################################# """ nAtom, kc = CoordinateMatrix.shape if proname == 'u': weight = scipy.matrix(scipy.eye(nAtom)) else: weight = GetPropertyMatrix(AtomLabel, proname) S = XPreCenter(CoordinateMatrix) u, s, v = scipy.linalg.svd(S.T * weight * S / sum(scipy.diag(weight))) res = scipy.power(s[2], 2) * nAtom / sum(scipy.power(S * scipy.matrix(u[:, 2]).T, 4)) return round(float(res.real), 3)
def compute_confusion_matrix(self,yp,yr): ''' Compute the confusion matrix ''' # Initialization n = yp.size C=int(yr.max()) self.confusion_matrix=sp.zeros((C,C)) # Compute confusion matrix for i in range(n): self.confusion_matrix[yp[i].astype(int)-1,yr[i].astype(int)-1] +=1 # Compute overall accuracy self.OA=sp.sum(sp.diag(self.confusion_matrix))/n # Compute Kappa nl = sp.sum(self.confusion_matrix,axis=1) nc = sp.sum(self.confusion_matrix,axis=0) self.Kappa = ((n**2)*self.OA - sp.sum(nc*nl))/(n**2-sp.sum(nc*nl))
def LearnGraphPredictors(L, X, H, lamb, gamma): #Learning the Graph (or Hypergraph)-based attribute predictors print( '------------------------------------------------------------------------' ) print('Learning the Graph (or Hypergraph)-based attribute predictors') #Generate the Label matrix using edge-vertex incident matrix L = sparse.csc_matrix(L) X = sparse.csc_matrix(X) Y = 2.0 * H - 1.0 Y = sparse.csc_matrix(Y) TempMat = scipy.dot(scipy.dot(X, L), X.T) + lamb * scipy.dot(X, X.T) #pass local = scipy.mean(scipy.diag(TempMat.toarray())) #pass #Learning the predictors by solving the issue as Least Square issue (row, col) = TempMat.shape I = (TempMat + local * gamma * scipy.eye(row, col)).I #pass I = sparse.csc_matrix(I) P = scipy.dot(I, lamb * scipy.dot(X, Y)) return P
def LMLdebug(self): """ LML function for debug """ assert self.N*self.P<2000, 'gp2kronSum:: N*P>=2000' y = SP.reshape(self.Y,(self.N*self.P), order='F') K = SP.kron(self.Cg.K(),self.XX) K += SP.kron(self.Cn.K()+self.offset*SP.eye(self.P),SP.eye(self.N)) cholK = LA.cholesky(K) Kiy = LA.cho_solve((cholK,False),y) lml = y.shape[0]*SP.log(2*SP.pi) lml += 2*SP.log(SP.diag(cholK)).sum() lml += SP.dot(y,Kiy) lml *= 0.5 return lml
def annopred_inf(beta_hats, pr_sigi, n=1000, reference_ld_mats=None, ld_window_size=100): """ infinitesimal model with snp-specific heritability derived from annotation used as the initial values for MCMC of non-infinitesimal model """ num_betas = len(beta_hats) updated_betas = sp.empty(num_betas) m = len(beta_hats) for i, wi in enumerate(range(0, num_betas, ld_window_size)): start_i = wi stop_i = min(num_betas, wi + ld_window_size) curr_window_size = stop_i - start_i Li = 1.0/pr_sigi[start_i: stop_i] D = reference_ld_mats[i] A = (n/(1))*D + sp.diag(Li) A_inv = linalg.pinv(A) updated_betas[start_i: stop_i] = sp.dot(A_inv / (1.0/n) , beta_hats[start_i: stop_i]) # Adjust the beta_hats return updated_betas