def get_4_squares(parent1, parent2): n_folds = 2 levels1 = np.unique(parent1) levels2 = np.unique(parent2) N1 = len(levels1) N2 = len(levels2) r1 = sp.random.permutation(N1) r2 = sp.random.permutation(N2) Icv1 = sp.floor(((sp.ones((N1))*n_folds)*r1)/N1) Icv2 = sp.floor(((sp.ones((N2))*n_folds)*r2)/N2) train_parents1 = levels1[Icv1 != 0] train_parents2 = levels2[Icv2 != 0] test_parents1 = levels1[Icv1 == 0] test_parents2 = levels2[Icv2 == 0] train_ind1 = np.array([e in train_parents1 for e in parent1], dtype=bool) train_ind2 = np.array([e in train_parents2 for e in parent2], dtype=bool) test_ind1 = np.array([e in test_parents1 for e in parent1], dtype=bool) test_ind2 = np.array([e in test_parents2 for e in parent2], dtype=bool) Itest = test_ind1 & test_ind2 Itrain_distant = train_ind1 & train_ind2 Itrain_close1 = (train_ind1 & test_ind2) Itrain_close2 = (train_ind2 & test_ind1) Itrain_close = select_subset(Itrain_close1 | Itrain_close2, Itest.sum()) return Itest, Itrain_distant, Itrain_close1, Itrain_close2, Itrain_close
def __init__(self, renderer=True, realtime=True, ip="127.0.0.1", port="21560"): # initialize base class GraphicalEnvironment.__init__(self) self.actLen=12 self.mySensors=sensors.Sensors(["EdgesReal"]) self.dists=array([20.0, sqrt(2.0)*20, sqrt(3.0)*20]) self.gravVect=array([0.0,-100.0,0.0]) self.centerOfGrav=zeros((1,3),float) self.pos=ones((8,3),float) self.vel=zeros((8,3),float) self.SpringM = ones((8,8),float) self.d=60.0 self.dt=0.02 self.startHight=10.0 self.dumping=0.4 self.fraktMin=0.7 self.fraktMax=1.3 self.minAkt=self.dists[0]*self.fraktMin self.maxAkt=self.dists[0]*self.fraktMax self.reset() self.count=0 self.setEdges() self.act(array([20.0]*12)) self.euler() self.realtime=realtime self.step=0 if renderer: self.setRenderInterface(FlexCubeRenderInterface(ip, port)) self.getRenderInterface().updateData(self.pos, self.centerOfGrav)
def buildBitsLUT(): global lutAsString nEntries=256 contrast=0.5 gamma=1.0 ramp = scipy.arange(-1.0,1.0,2.0/nEntries) ramp = (ramp*contrast+1.0)/2.0 #get into range 0:1 ramp = (ramp**gamma) * 2**16 ramp = ramp.astype(scipy.UnsignedInt16) RGB = scipy.ones((1,nEntries*2,3),scipy.UnsignedInt8) RGB[:, 0::2, 0] = 1#byteMS(ramp)#R RGB[:, 1::2, 0] = 0# byteLS(ramp) RGB[:, 0::2, 1] = 1#byteMS(ramp)#G RGB[:, 1::2, 1] = 0#byteLS(ramp) RGB[:, 0::2, 2] = 1#byteMS(ramp)#B RGB[:, 1::2, 2] = 0#byteLS(ramp) #prepend the bits++ header (precedes LUT) #and create a string version ready for drawing head = scipy.ones((1,12,3),scipy.UnsignedInt8) head[:,:,0] = [ 36, 63, 8, 211, 3, 112, 56, 34,0,0,0,0]#R head[:,:,1] = [ 106, 136, 19, 25, 115, 68, 41, 159,0,0,0,0]#G head[:,:,2] = [ 133, 163, 138, 46, 164, 9, 49, 208,0,0,0,0]#B head[:,:,0] = [ 0, 63, 8, 211, 3, 112, 56, 34,0,0,0,0]#R head[:,:,1] = [ 0, 136, 19, 25, 115, 68, 41, 159,0,0,0,0]#G head[:,:,2] = [ 0, 163, 138, 46, 164, 9, 49, 208,0,0,0,0]#B #head[:,:,0] = [ 255, 255, 0, 0, 0, 0, 56, 34,0,0,0,0]#R #head[:,:,1] = [ 0, 0, 255, 255, 0, 0, 41, 159,0,0,0,0]#G #head[:,:,2] = [ 0, 0, 0, 0, 255, 255, 49, 208,0,0,0,0]#B asArr = scipy.concatenate((head,RGB),1) lutAsString = asArr.tostring()
def test_pore2centroid(self): temp_coords = self.net['pore.coords'] self.geo['pore.centroid'] = sp.ones([self.geo.num_pores(), 3]) vo.pore2centroid(self.net) assert sp.sum(self.net['pore.coords'] - sp.ones([self.geo.num_pores(), 3])) == 0.0 self.net['pore.coords'] = temp_coords
def calculateGradient(self): # normalize rewards # self.dataset.data['reward'] /= max(ravel(abs(self.dataset.data['reward']))) # initialize variables R = ones((self.dataset.getNumSequences(), 1), float) X = ones((self.dataset.getNumSequences(), self.loglh.getDimension('loglh') + 1), float) # collect sufficient statistics print self.dataset.getNumSequences() for n in range(self.dataset.getNumSequences()): _state, _action, reward = self.dataset.getSequence(n) seqidx = ravel(self.dataset['sequence_index']) if n == self.dataset.getNumSequences() - 1: # last sequence until end of dataset loglh = self.loglh['loglh'][seqidx[n]:, :] else: loglh = self.loglh['loglh'][seqidx[n]:seqidx[n + 1], :] X[n, :-1] = sum(loglh, 0) R[n, 0] = sum(reward, 0) # linear regression beta = dot(pinv(X), R) return beta[:-1]
def make_block_border_mask(spots, areas): """Returns a mask indicating which pixels lie just outside a block of spots""" inside = make_inside_mask(spots) outside = make_outside_mask(spots, areas) very_near_inside = sp.ndimage.binary_dilation(inside, structure=sp.ones((3,3)), iterations=8) near_inside = sp.ndimage.binary_dilation(inside, structure=sp.ones((3,3)), iterations=32) return near_inside & ~very_near_inside & outside
def kalman_filter(b, V, Phi, y, X, sigma, Sigma, switch = 0, D = None, d = None, G = None, a = None, c = None): r""" .. math:: :nowrap: \begin{eqnarray*} \beta_{t|t-1} = \Phi \: \beta_{t-1|t-1}\\ V_{t|t-1} = \Phi V_{t-1|t-1} \Phi ^T + \Sigma \\ e_t = y_t - X_t \beta_{t|t-1}\\ K_t = V_{t|t-1} X_t^T (\sigma + X_t V_{t|t-1} X_t )^{-1}\\ \beta_{t|t} = \beta_{t|t-1} + K_t e_t\\ V_{t|t} = (I - K_t X_t^T) V_{t|t-1}\\ \end{eqnarray*} """ n = scipy.shape(X)[1] beta = scipy.empty(scipy.shape(X)) n = len(b) if D is None: D = scipy.ones((1, n)) if d is None: d = scipy.matrix(1.) if G is None: G = scipy.identity(n) if a is None: a = scipy.zeros((n, 1)) if c is None: c = scipy.ones((n, 1)) # import code; code.interact(local=locals()) (b, V) = kalman_predict(b, V, Phi, Sigma) for i in xrange(len(X)): beta[i] = scipy.array(b).T (b, V, e, K) = kalman_upd(b, V, y[i], X[i], sigma, Sigma, switch, D, d, G, a, c) (b, V) = kalman_predict(b, V, Phi, Sigma) return beta
def learn(self, X, t, tol=0.01, amax=1e10): u"""学習""" N = X.shape[0] a = sp.ones(N+1) # hyperparameter b = 1.0 phi = sp.ones((N, N+1)) # design matrix phi[:,1:] = [[self._kernel(xi, xj) for xj in X] for xi in X] diff = 1 while diff >= tol: sigma = spla.inv(sp.diag(a) + b * sp.dot(phi.T, phi)) m = b * sp.dot(sigma, sp.dot(phi.T, t)) gamma = sp.ones(N+1) - a * sigma.diagonal() anew = gamma / (m * m) bnew = (N - gamma.sum()) / sp.square(spla.norm(t - sp.dot(phi, m))) anew[anew >= amax] = amax adiff, bdiff = anew - a, bnew - b diff = (adiff * adiff).sum() + bdiff * bdiff a, b = anew, bnew print ".", self._a = a self._b = b self._X = X self._m = m self._sigma = sigma self._amax = amax
def phenSpecificEffects(snps,pheno1,pheno2,K=None,covs=None,test='lrt'): """ Univariate fixed effects interaction test for phenotype specific SNP effects Args: snps: [N x S] SP.array of S SNPs for N individuals (test SNPs) pheno1: [N x 1] SP.array of 1 phenotype for N individuals pheno2: [N x 1] SP.array of 1 phenotype for N individuals K: [N x N] SP.array of LMM-covariance/kinship koefficients (optional) If not provided, then linear regression analysis is performed covs: [N x D] SP.array of D covariates for N individuals test: 'lrt' for likelihood ratio test (default) or 'f' for F-test Returns: limix LMM object """ N=snps.shape[0] if K is None: K=SP.eye(N) assert (pheno1.shape[1]==pheno2.shape[1]), "Only consider equal number of phenotype dimensions" if covs is None: covs = SP.ones(N,1) assert (pheno1.shape[1]==1 and pheno2.shape[1]==1 and pheno1.shape[0]==N and pheno2.shape[0]==N and K.shape[0]==N and K.shape[1]==N and covs.shape[0]==N), "shapes missmatch" Inter = SP.zeros((N*2,1)) Inter[0:N,0]=1 Inter0 = SP.ones((N*2,1)) Yinter=SP.concatenate((pheno1,pheno2),0) Xinter = SP.tile(snps,(2,1)) Covitner= SP.tile(covs(2,1)) lm = simple_interaction(snps=Xinter,pheno=Yinter,covs=Covinter,Inter=Inter,Inter0=Inter0,test=test) return lm
def sqcover(A,n): edge = sp.sqrt(A) # the length of an edge d = edge/n # the distance between two adjacent points r = d/2 # the "radius of " end = edge - r # end point base = sp.linspace(r, end, n) first_line = sp.transpose(sp.vstack((base, r*sp.ones(n)))) increment = sp.transpose(sp.vstack((sp.zeros(n), d*sp.ones(n)))) pts = first_line y_diff = increment for i in range(n-1): pts = sp.vstack((pts, first_line + y_diff)) y_diff = y_diff + increment # Color matter colors = [] for p in pts: cval = n*p[0] + p[1] # the x-coord has a higher weight cval = colormap.Spectral(cval/((n+1)*end)) # normalize by the max value that cval can take. colors.append(cval) colors = sp.array(colors) cover = (pts, r, colors) return cover
def createLargeSubMatrix(): # Create a large matrix, but with same amount of 'ones' as the small submatrix t1 = time.time() m=40000 n=1000000 M=sparse.lil_matrix((m,n)) m=500 n=20000 # Populate some of the matrix M[0,:]=ones(n) M[:,0]=1 M[(m/2),:]=ones(n) M[:,(n/2)]=1 M[(m-1),:]=ones(n) M[:,(n-1)]=1 t2 = time.time() print 'Time used: ',(t2-t1) return M
def __init__(self, render=True, realtime=True, ip="127.0.0.1", port="21560"): # initialize base class self.render = render if self.render: self.updateDone = True self.updateLock = threading.Lock() self.server = UDPServer(ip, port) self.actLen = 12 self.mySensors = sensors.Sensors(["EdgesReal"]) self.dists = array([20.0, sqrt(2.0) * 20, sqrt(3.0) * 20]) self.gravVect = array([0.0, -100.0, 0.0]) self.centerOfGrav = zeros((1, 3), float) self.pos = ones((8, 3), float) self.vel = zeros((8, 3), float) self.SpringM = ones((8, 8), float) self.d = 60.0 self.dt = 0.02 self.startHight = 10.0 self.dumping = 0.4 self.fraktMin = 0.7 self.fraktMax = 1.3 self.minAkt = self.dists[0] * self.fraktMin self.maxAkt = self.dists[0] * self.fraktMax self.reset() self.count = 0 self.setEdges() self.act(array([20.0] * 12)) self.euler() self.realtime = realtime self.step = 0
def _additionalInit(self): assert self.numberOfCenters == 1, 'Mixtures of Gaussians not supported yet.' xdim = self.numParameters self.alphas = ones(self.numberOfCenters) / float(self.numberOfCenters) self.mus = [] self.sigmas = [] if self.rangemins == None: self.rangemins = -ones(xdim) if self.rangemaxs == None: self.rangemaxs = ones(xdim) if self.initCovariances == None: if self.diagonalOnly: self.initCovariances = ones(xdim) else: self.initCovariances = eye(xdim) for _ in range(self.numberOfCenters): self.mus.append(rand(xdim) * (self.rangemaxs - self.rangemins) + self.rangemins) self.sigmas.append(dot(eye(xdim), self.initCovariances)) self.samples = list(range(self.windowSize)) self.fitnesses = zeros(self.windowSize) self.generation = 0 self.allsamples = [] self.muevals = [] self.allmus = [] self.allsigmas = [] self.allalphas = [] self.allUpdateSizes = [] self.allfitnesses = [] self.meanShifts = [zeros((self.numParameters)) for _ in range(self.numberOfCenters)] self._oneEvaluation(self._initEvaluable)
def svm_gradient_batch_fast(X_pred, X_exp, y, X_pred_ids, X_exp_ids, w, C=.0001, sigma=1.): # sample Kernel rnpred = X_pred_ids#sp.random.randint(low=0,high=len(y),size=n_pred_samples) rnexpand = X_exp_ids#sp.random.randint(low=0,high=len(y),size=n_expand_samples) #K = GaussKernMini_fast(X_pred.T,X_exp.T,sigma) X1 = X_pred.T X2 = X_exp.T if sp.sparse.issparse(X1): G = sp.outer(X1.multiply(X1).sum(axis=0), sp.ones(X2.shape[1])) else: G = sp.outer((X1 * X1).sum(axis=0), sp.ones(X2.shape[1])) if sp.sparse.issparse(X2): H = sp.outer(X2.multiply(X2).sum(axis=0), sp.ones(X1.shape[1])) else: H = sp.outer((X2 * X2).sum(axis=0), sp.ones(X1.shape[1])) K = sp.exp(-(G + H.T - 2. * fast_dot(X1.T, X2)) / (2. * sigma ** 2)) # K = sp.exp(-(G + H.T - 2.*(X1.T.dot(X2)))/(2.*sigma**2)) if sp.sparse.issparse(X1) | sp.sparse.issparse(X2): K = sp.array(K) # compute predictions yhat = fast_dot(K,w[rnexpand]) # compute whether or not prediction is in margin inmargin = (yhat * y[rnpred]) <= 1 # compute gradient G = C * w[rnexpand] - fast_dot((y[rnpred] * inmargin), K) return G,rnexpand
def estimateBeta(X,Y,K,C=None,addBiasTerm=False,numintervals0=100,ldeltamin0=-5.0,ldeltamax0=5.0): """ compute all pvalues If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models) """ n,s=X.shape; n_pheno=Y.shape[1]; S,U=LA.eigh(K); UY=SP.dot(U.T,Y); UX=SP.dot(U.T,X); if (C==None): Ucovariate=SP.dot(U.T,SP.ones([n,1])); else: if (addBiasTerm): C_=SP.concatenate((C,SP.ones([n,1])),axis=1) Ucovariate=SP.dot(U.T,C_); else: Ucovariate=SP.dot(U.T,C); n_covar=Ucovariate.shape[1]; beta = SP.empty((n_pheno,s,n_covar+1)); LL=SP.ones((n_pheno,s))*(-SP.inf); ldelta=SP.empty((n_pheno,s)); sigg2=SP.empty((n_pheno,s)); pval=SP.ones((n_pheno,s))*(-SP.inf); for phen in SP.arange(n_pheno): UY_=UY[:,phen]; ldelta[phen]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0); for snp in SP.arange(s): UX_=SP.hstack((UX[:,snp:snp+1],Ucovariate)); nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True); beta[phen,snp,:]=beta_; sigg2[phen,snp]=sigg2_; LL[phen,snp]=-nLL_; return beta, ldelta
def _update_indicator(self,K,L): """ update the indicator """ _update = {'term': self.n_terms*SP.ones((K,L)).T.ravel(), 'row': SP.kron(SP.arange(K)[:,SP.newaxis],SP.ones((1,L))).T.ravel(), 'col': SP.kron(SP.ones((K,1)),SP.arange(L)[SP.newaxis,:]).T.ravel()} for key in _update.keys(): self.indicator[key] = SP.concatenate([self.indicator[key],_update[key]])
def do_compare_wedges(file1="stars-82.txt", file2="Stripe82_coadd.csv", stripe=82, mag=0, size=1.0): """ Modify if size is not 1.0 """ one_run = fi.read_data(file1) or_l = len(one_run[:,0]) or_hist = sv.plot_wedge_density(one_run, stripe, q=0.458, r0=19.4, name="_rho1", mag=mag, plot=0, size=size) coadd = fi.read_data(file2) ca_l = len(coadd[:,0]) ca_hist = sv.plot_wedge_density(coadd, stripe, q=0.458, r0=19.4, name="_rho2", mag=mag, plot=0, size=size) # Separate into heights or_h = or_hist[:,1] ca_h = ca_hist[:,1] # Divide the first data set by the second if len(or_h) < len(ca_h): l = len(or_h) extra_h = -0.1*sc.ones((len(ca_h)-l)) else: l = len(ca_h) extra_h = 0.1*sc.ones((len(or_h)-l)) diff_h = sc.zeros(l) for i in range(l): diff_h[i] = ( or_h[i] / ca_h[i] ) out = sc.zeros((l,3)) for i in range(l): out[i,0], out[i,1] = ca_hist[i,0], diff_h[i] out[i,2] = 1.0 #ma.sqrt(or_hist[i,2]*or_hist[i,2] + ca_hist[i,2]*ca_hist[i,2]) return out
def evalgrid1D(f, evalgrid = None, nGrid=10, minval=0.0, maxval = 0.99999, dimF=0): ''' evaluate a function f(x) on all values of a grid. -------------------------------------------------------------------------- Input: f(x) : callable target function evalgrid: 1-D array prespecified grid of x-values nGrid : number of x-grid points to evaluate f(x) minval : minimum x-value for optimization of f(x) maxval : maximum x-value for optimization of f(x) -------------------------------------------------------------------------- Output: evalgrid : x-values resultgrid : f(x)-values -------------------------------------------------------------------------- ''' if evalgrid is None: step = (maxval-minval)/(nGrid) evalgrid = SP.arange(minval,maxval+step,step) if dimF: resultgrid = SP.ones((evalgrid.shape[0],dimF))*9999999999999.0 else: resultgrid = SP.ones(evalgrid.shape[0])*9999999999999.0 for i in xrange(evalgrid.shape[0]): fevalgrid = f(evalgrid[i]) is_real=False try: is_real = SP.isreal(fevalgrid).all() except: is_real = SP.isreal(fevalgrid) assert is_real,"function returned imaginary value" resultgrid[i] = fevalgrid return (evalgrid,resultgrid)
def _generate_masked_mesh(self, cell_mask=None): r""" Generates the mesh based on the cell mask provided """ # if cell_mask is None: cell_mask = sp.ones(self.data_map.shape, dtype=bool) # # initializing arrays self._edges = sp.ones(0, dtype=str) self._merge_patch_pairs = sp.ones(0, dtype=str) self._create_blocks(cell_mask) # # building face arrays mapper = sp.ravel(sp.array(cell_mask, dtype=int)) mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper)) mapper = sp.reshape(mapper, (self.nz, self.nx)) mapper[~cell_mask] = -sp.iinfo(int).max # boundary_dict = { 'bottom': {'bottom': mapper[0, :][cell_mask[0, :]]}, 'top': {'top': mapper[-1, :][cell_mask[-1, :]]}, 'left': {'left': mapper[:, 0][cell_mask[:, 0]]}, 'right': {'right': mapper[:, -1][cell_mask[:, -1]]}, 'front': {'front': mapper[cell_mask]}, 'back': {'back': mapper[cell_mask]}, 'internal': {'bottom': [], 'top': [], 'left': [], 'right': []} } # # determining cells linked to a masked cell cell_mask = sp.where(~sp.ravel(cell_mask))[0] inds = sp.in1d(self._field._cell_interfaces, cell_mask) inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2)) inds = inds[:, 0].astype(int) + inds[:, 1].astype(int) inds = (inds == 1) links = self._field._cell_interfaces[inds] # # adjusting order so masked cells are all on links[:, 1] swap = sp.in1d(links[:, 0], cell_mask) links[swap] = links[swap, ::-1] # # setting side based on index difference sides = sp.ndarray(len(links), dtype='<U6') sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom' sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top' sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left' sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right' # # adding each block to the internal face dictionary inds = sp.ravel(mapper)[links[:, 0]] for side, block_id in zip(sides, inds): boundary_dict['internal'][side].append(block_id) self.set_boundary_patches(boundary_dict, reset=True)
def alloc_numpy_arrays(number_cells, space_direction, initval=0, dtype='f'): """ """ space = [sc.ones((1,1,1), dtype),\ sc.ones((1,1,1), dtype),\ sc.ones((1,1,1), dtype)] number_cells = tuple(number_cells) if 'x' in space_direction: space[x_axis] = sc.zeros(number_cells, dtype) if 'y' in space_direction: space[y_axis] = sc.zeros(number_cells, dtype) if 'z' in space_direction: space[z_axis] = sc.zeros(number_cells, dtype) if initval != 0: if len(number_cells) == 3: space[x_axis][:,:,:] = initval space[y_axis][:,:,:] = initval space[z_axis][:,:,:] = initval elif len(number_cells) == 2: space[x_axis][:,:] = initval space[y_axis][:,:] = initval space[z_axis][:,:] = initval return space
def __init__(self, typ, numOGaus=10, alphaA=0.02, alphaM=0.02, alphaS=0.02): self.typ = typ self.alphaA = alphaA self.alphaM = alphaM self.alphaS = alphaS self.minSig = 0.000001 self.numOGaus = numOGaus #Number of Gaussians self.rangeMin = -20.0 self.rangeMax = 20.0 self.epsilon = (self.rangeMax - self.rangeMin) / (sqrt(2.0) * float(self.numOGaus - 1)) #Initial value of sigmas self.propFakt = 1.0 / float(self.numOGaus) self.distFakt = 1.0 / float(self.numOGaus - 1) self.distRange = self.rangeMax - self.rangeMin self.sigma = ones(self.numOGaus) self.mue = zeros(self.numOGaus) self.alpha = ones(self.numOGaus) self.sigma *= self.epsilon self.alpha /= float(self.numOGaus) self.alpha = self.invSigmo(self.alpha) for i in range(self.numOGaus): self.mue[i] = self.distRange * float(i) * self.distFakt + self.rangeMin self.baseline = 0.0 self.best = 0.000001
def __init__(self, evaluator, evaluable, **parameters): BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters) self.numParams = self.xdim + self.xdim * (self.xdim+1) / 2 if self.momentum != None: self.momentumVector = zeros(self.numParams) if self.learningRateSigma == None: self.learningRateSigma = self.learningRate if self.rangemins == None: self.rangemins = -ones(self.xdim) if self.rangemaxs == None: self.rangemaxs = ones(self.xdim) if self.initCovariances == None: if self.diagonalOnly: self.initCovariances = ones(self.xdim) else: self.initCovariances = eye(self.xdim) self.x = rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins self.sigma = dot(eye(self.xdim), self.initCovariances) self.factorSigma = cholesky(self.sigma) self.reset()
def make_data_twoclass(N=50): # generates some toy data mu = sp.array([[0,2],[0,-2]]).T C = sp.array([[5.,4.],[4.,5.]]) X = sp.hstack((mvn(mu[:,0],C,N/2).T, mvn(mu[:,1],C,N/2).T)) Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.)))) return X,Y
def addFixedEffect(self, F=None, A=None, Ftest=None): """ add fixed effect term to the model Args: F: sample design matrix for the fixed effect [N,K] A: trait design matrix for the fixed effect (e.g. sp.ones((1,P)) common effect; sp.eye(P) any effect) [L,P] Ftest: sample design matrix for test samples [Ntest,K] """ if A is None: A = sp.eye(self.P) if F is None: F = sp.ones((self.N,1)) if self.Ntest is not None: Ftest = sp.ones((self.Ntest,1)) assert A.shape[1]==self.P, 'VarianceDecomposition:: A has incompatible shape' assert F.shape[0]==self.N, 'VarianceDecimposition:: F has incompatible shape' if Ftest is not None: assert self.Ntest is not None, 'VarianceDecomposition:: specify Ntest for predictions (method VarianceDecomposition::setTestSampleSize)' assert Ftest.shape[0]==self.Ntest, 'VarianceDecimposition:: Ftest has incompatible shape' assert Ftest.shape[1]==F.shape[1], 'VarianceDecimposition:: Ftest has incompatible shape' # add fixed effect self.sample_designs.append(F) self.sample_test_designs.append(Ftest) self.trait_designs.append(A) self._desync()
def __init__(self, evaluator, evaluable, **parameters): BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters) self.alphas = ones(self.numberOfCenters)/self.numberOfCenters self.mus = [] self.sigmas = [] self.tau = 1. if self.rangemins == None: self.rangemins = -ones(self.xdim) if self.rangemaxs == None: self.rangemaxs = ones(self.xdim) if self.initCovariances == None: self.initCovariances = eye(self.xdim) if self.elitist and self.numberOfCenters == 1 and not self.noisyEvaluator: # in the elitist case seperate evaluations are not necessary. # CHECKME: maybe in the noisy case? self.evalMus = False assert not(self.useCauchy and self.numberOfCenters > 1) for dummy in range(self.numberOfCenters): self.mus.append(rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins) self.sigmas.append(dot(eye(self.xdim), self.initCovariances)) self.reset()
def makedata(testpath): """ This will make the input data for the test case. The data will have the default set of parameters Ne=Ne=1e11 and Te=Ti=2000. Inputs testpath - Directory that will hold the data. """ finalpath = testpath.joinpath('Origparams') if not finalpath.exists(): finalpath.mkdir() data=SIMVALUES z = sp.linspace(50.,1e3,50) nz = len(z) params = sp.tile(data[sp.newaxis,sp.newaxis,:,:],(nz,1,1,1)) coords = sp.column_stack((sp.ones(nz),sp.ones(nz),z)) species=['O+','e-'] times = sp.array([[0,1e3]]) vel = sp.zeros((nz,1,3)) Icont1 = IonoContainer(coordlist=coords,paramlist=params,times = times,sensor_loc = sp.zeros(3),ver =0,coordvecs = ['x','y','z'],paramnames=None,species=species,velocity=vel) finalfile = finalpath.joinpath('0 stats.h5') Icont1.saveh5(str(finalfile)) # set start temp to 1000 K. Icont1.Param_List[:,:,:,1]=1e3 Icont1.saveh5(str(testpath.joinpath('startfile.h5')))
def gensquexpIPdraw(d,lb,ub,sl,su,sfn,sls,cfn): #axis = 0 value = sl #d dimensional objective +1 for s nt=25 #print sp.hstack([sp.array([[sl]]),lb]) #print sp.hstack([sp.array([[su]]),ub]) [X,Y,S,D] = ESutils.gen_dataset(nt,d+1,sp.hstack([sp.array([[sl]]),lb]).flatten(),sp.hstack([sp.array([[su]]),ub]).flatten(),GPdc.SQUEXP,sp.array([1.5]+[sls]+[0.30]*d)) G = GPdc.GPcore(X,Y,S,D,GPdc.kernel(GPdc.SQUEXP,d+1,sp.array([1.5]+[sls]+[0.30]*d))) def obj(x,s,d,override=False): x = x.flatten() if sfn(x)==0. or override: noise = 0. else: noise = sp.random.normal(scale=sp.sqrt(sfn(x))) return [G.infer_m(x,[d])[0,0]+noise,cfn(x)] def dirwrap(x,y): z = obj(sp.array([[sl]+[i for i in x]]),sl,[sp.NaN],override=True) return (z,0) [xmin0,ymin0,ierror] = DIRECT.solve(dirwrap,lb,ub,user_data=[], algmethod=1, maxf=89000, logfilename='/dev/null') lb2 = xmin0-sp.ones(d)*1e-4 ub2 = xmin0+sp.ones(d)*1e-4 [xmin,ymin,ierror] = DIRECT.solve(dirwrap,lb2,ub2,user_data=[], algmethod=1, maxf=89000, logfilename='/dev/null') #print "RRRRR"+str([xmin0,xmin,ymin0,ymin,xmin0-xmin,ymin0-ymin]) return [obj,xmin,ymin]
def plot_median_errors(RefinementLevels): for i in RefinementLevels[0].cases: x =[]; y =[]; print "Analyzing median error on: ", i ; for r in RefinementLevels: x.append(r.LUT.D_dim*r.LUT.P_dim) r.get_REL_ERR_SU2(i) y.append(r.SU2[i].median_ERR*100) x = sp.array(x) y = sp.array(y) y = y[sp.argsort(x)] x = x[sp.argsort(x)] LHM = sp.ones((len(x),2)) RHS = sp.ones((len(x),1)) LHM[:,1] = sp.log10(x) RHS[:,0] = sp.log10(y) sols = sp.linalg.lstsq(LHM,RHS) b = -sols[0][1] plt.loglog(x,y, label='%s, %s'%(i,r'$O(\frac{1}{N})^{%s}$'%str(sp.around(b,2))), basex=10, basey=10, \ subsy=sp.linspace(10**(-5), 10**(-2),20),\ subsx=sp.linspace(10**(2), 10**(5),50)) #for r in RefinementLevels: # x.append(r.LUT.D_dim*r.LUT.P_dim) # r.get_REL_ERR_SciPy(i) # y.append(r.SciPy[i].median_ERR*100) #plt.plot(x,y, label='SciPy: %s'%i) plt.grid(which='both') plt.xlabel('Grid Nodes (N)') plt.ylabel('Median relative error [%]') return;
def lossTraces(fwrap, aclass, dim, maxsteps, storesteps=None, x0=None, initNoise=0., minLoss=1e-10, algoparams={}): """ Compute a number of loss curves, for the provided settings, stored at specific storestep points. """ if not storesteps: storesteps = range(maxsteps + 1) # initial points, potentially noisy if x0 is None: x0 = ones(dim) + randn(dim) * initNoise # tracking progress by callback paramtraces = {'index':-1} def storer(a): lastseen = paramtraces['index'] for ts in [x for x in storesteps if x > lastseen and x <= a._num_updates]: paramtraces[ts] = a.bestParameters.copy() paramtraces['index'] = a._num_updates # initialization algo = aclass(fwrap, x0, callback=storer, **algoparams) print algo, fwrap, dim, maxsteps, # store initial step algo.callback(algo) algo.run(maxsteps) # process learning curve del paramtraces['index'] paramtraces = array([x for _, x in sorted(paramtraces.items())]) oloss = mean(fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum)) ls = abs(fwrap.stochfun.expectedLoss(ravel(paramtraces)) - oloss) + minLoss ls = reshape(ls, paramtraces.shape) print median(ls[-1]) return ls
def range_query_geno_local(self, idx_start=None, idx_end=None, chrom=None,pos_start=None, pos_end=None,windowsize=0): """ return an index for a range query on the genotypes """ if idx_start==None and idx_end==None and pos_start==None and pos_end==None and chrom==None: return sp.arange(0,self.num_snps) elif idx_start is not None or idx_end is not None: if idx_start is None: idx_start = 0 if idx_end is None: idx_end = self.num_snps res = sp.arange(idx_start,idx_end) return res elif chrom is not None: res = self.geno_pos["chrom"]==chrom elif pos_start is not None or pos_end is not None: if pos_start is not None and pos_end is not None: assert pos_start[0] == pos_end[0], "chromosomes have to match" if pos_start is None: idx_larger = sp.ones(self.num_snps,dtype=bool) else: idx_larger = (self.geno_pos["pos"]>=(pos_start[1]-windowsize)) & (self.geno_pos["chrom"]==pos_start[0]) if pos_end is None: idx_smaller = sp.ones(self.num_snps,dtype=bool) else: idx_smaller = (self.geno_pos["pos"]<=(pos_end[1]+windowsize)) & (self.geno_pos["chrom"]==pos_end[0]) res = idx_smaller & idx_larger else: raise Exception("This should not be triggered")#res = sp.ones(self.geno_pos.shape,dtype=bool) return sp.where(res)[0]
print(last + ' Arnorm = %12.4e' % (Arnorm, )) print(last + msg[istop + 1]) if istop == 6: info = maxiter else: info = 0 return (postprocess(x), info) if __name__ == '__main__': from scipy import ones, arange from scipy.linalg import norm from scipy.sparse import spdiags n = 10 residuals = [] def cb(x): residuals.append(norm(b - A * x)) #A = poisson((10,),format='csr') A = spdiags([arange(1, n + 1, dtype=float)], [0], n, n, format='csr') M = spdiags([1.0 / arange(1, n + 1, dtype=float)], [0], n, n, format='csr') A.psolve = M.matvec b = 0 * ones(A.shape[0]) x = minres(A, b, tol=1e-12, maxiter=None, callback=cb) #x = cg(A,b,x0=b,tol=1e-12,maxiter=None,callback=cb)[0]
def cvglmnet(*, x, y, family = 'gaussian', ptype = 'default', nfolds = 10, foldid = scipy.empty([0]), parallel = 1, keep = False, grouped = True, **options): options = glmnetSet(options) if 0 < len(options['lambdau']) < 2: raise ValueError('Need more than one value of lambda for cv.glmnet') nobs = x.shape[0] # we should not really need this. user must supply the right shape # if y.shape[0] != nobs: # y = scipy.transpose(y) # convert 1d python array of size nobs to 2d python array of size nobs x 1 if len(y.shape) == 1: y = scipy.reshape(y, [y.size, 1]) # we should not really need this. user must supply the right shape # if (len(options['offset']) > 0) and (options['offset'].shape[0] != nobs): # options['offset'] = scipy.transpose(options['offset']) if len(options['weights']) == 0: options['weights'] = scipy.ones([nobs, 1], dtype = scipy.float64) # main call to glmnet glmfit = glmnet(x = x, y = y, family = family, **options) is_offset = glmfit['offset'] options['lambdau'] = glmfit['lambdau'] nz = glmnetPredict(glmfit, scipy.empty([0]), scipy.empty([0]), 'nonzero') if glmfit['class'] == 'multnet': nnz = scipy.zeros([len(options['lambdau']), len(nz)]) for i in range(len(nz)): nnz[:, i] = scipy.transpose(scipy.sum(nz[i], axis = 0)) nz = scipy.ceil(scipy.median(nnz, axis = 1)) elif glmfit['class'] == 'mrelnet': nz = scipy.transpose(scipy.sum(nz[0], axis = 0)) else: nz = scipy.transpose(scipy.sum(nz, axis = 0)) if len(foldid) == 0: ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(nobs/nfolds))]) mb = scipy.arange(scipy.mod(nobs, nfolds)) mb = scipy.reshape(mb, [1, mb.size]) population = scipy.append(ma, mb, axis = 1) mc = scipy.random.permutation(len(population)) mc = mc[0:nobs] foldid = population[mc] foldid = scipy.reshape(foldid, [foldid.size,]) else: nfolds = scipy.amax(foldid) + 1 if nfolds < 3: raise ValueError('nfolds must be bigger than 3; nfolds = 10 recommended') cpredmat = list() foldid = scipy.reshape(foldid, [foldid.size, ]) if parallel != 1: if parallel == -1: num_cores = multiprocessing.cpu_count() else num_cores = parallel sys.stderr.write("[status]\tParallel glmnet cv with " + str(num_cores) + " cores\n") cpredmat = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(doCV)(i, x, y, family, foldid, nfolds, is_offset, **options) for i in range(nfolds)) else: for i in range(nfolds): newFit = doCV(i, x, y, family, foldid, nfolds, is_offset, **options) cpredmat.append(newFit) if cpredmat[0]['class'] == 'elnet': cvstuff = cvelnet( cpredmat, options['lambdau'], x, y \ , options['weights'], options['offset'] \ , foldid, ptype, grouped, keep) elif cpredmat[0]['class'] == 'lognet': cvstuff = cvlognet(cpredmat, options['lambdau'], x, y \ , options['weights'], options['offset'] \ , foldid, ptype, grouped, keep) elif cpredmat[0]['class'] == 'multnet': cvstuff = cvmultnet(cpredmat, options['lambdau'], x, y \ , options['weights'], options['offset'] \ , foldid, ptype, grouped, keep) elif cpredmat[0]['class'] == 'mrelnet': cvstuff = cvmrelnet(cpredmat, options['lambdau'], x, y \ , options['weights'], options['offset'] \ , foldid, ptype, grouped, keep) elif cpredmat[0]['class'] == 'fishnet': cvstuff = cvfishnet(cpredmat, options['lambdau'], x, y \ , options['weights'], options['offset'] \ , foldid, ptype, grouped, keep) elif cpredmat[0]['class'] == 'coxnet': raise NotImplementedError('Cross-validation for coxnet not implemented yet.') #cvstuff = cvcoxnet(cpredmat, options['lambdau'], x, y \ # , options['weights'], options['offset'] \ # , foldid, ptype, grouped, keep) cvm = cvstuff['cvm'] cvsd = cvstuff['cvsd'] cvname = cvstuff['name'] CVerr = dict() CVerr['lambdau'] = options['lambdau'] CVerr['cvm'] = scipy.transpose(cvm) CVerr['cvsd'] = scipy.transpose(cvsd) CVerr['cvup'] = scipy.transpose(cvm + cvsd) CVerr['cvlo'] = scipy.transpose(cvm - cvsd) CVerr['nzero'] = nz CVerr['name'] = cvname CVerr['glmnet_fit'] = glmfit if keep: CVerr['fit_preval'] = cvstuff['fit_preval'] CVerr['foldid'] = foldid if ptype == 'auc': cvm = -cvm CVerr['lambda_min'] = scipy.amax(options['lambdau'][cvm <= scipy.amin(cvm)]).reshape([1]) idmin = options['lambdau'] == CVerr['lambda_min'] semin = cvm[idmin] + cvsd[idmin] CVerr['lambda_1se'] = scipy.amax(options['lambdau'][cvm <= semin]).reshape([1]) CVerr['class'] = 'cvglmnet' return(CVerr)
def _create_feature_vector(pixel_group): """ Generates the feature vector, given a square bunch of pixels. ``pixel_group`` by itself is actually a list of pixel groups (sub-images). Each sub-image will become a different part of the vector """ # Initialise some values that we'll use later feature_vector = sp.empty(FEATURE_VECTOR_SIZE) num_pixels = pixel_group[0].shape[0] # Find the angles of each point in degrees x = sp.arange(-num_pixels/2, num_pixels/2) grid = sp.meshgrid(x, x) angle = sp.angle(grid[0] + 1j*grid[1], deg=True) # Create histrogram buckets and find indices of the spectrum array that # fall into a particular bucket diff = -360/NUM_HISTOGRAM_BUCKETS buckets = sp.arange(180, -180+diff, diff) indices = {} for i in range(0, NUM_HISTOGRAM_BUCKETS): indices[i] = sp.where((angle <= buckets[i]) * (angle > buckets[i+1]) ) buckets = buckets[:-1] # Average out the Cb and Cr components and add it to the feature vector feature_vector[0] = sp.dot(sp.ones((1, num_pixels)), pixel_group[0].dot(sp.ones((num_pixels, 1)))) feature_vector[0] /= num_pixels * num_pixels feature_vector[1] = sp.dot(sp.ones((1, num_pixels)), pixel_group[1].dot(sp.ones((num_pixels, 1)))) feature_vector[1] /= num_pixels * num_pixels # The other five elements are the orientation entropies at different scales for i in range(2, FEATURE_VECTOR_SIZE): # First calculate the centre-shifted fourier transform of the pixel # group, and then apply a log transformation to get the magnitude # spectrum transformed_pixel_group = np.fft.fft2(pixel_group[i]) centre_shifted_pixel_group = np.fft.fftshift(transformed_pixel_group) fourier_spectrum = sp.log(abs(centre_shifted_pixel_group) + 1) # Calculate the orientation histogram of the log magnitude spectrum # by summing over groups of angles. The histogram value at a given # angle should give the power in the log magnitude spectrum around that # angle (approximately) histogram = sp.empty(buckets.shape) for j in range(NUM_HISTOGRAM_BUCKETS): histogram[j] = fourier_spectrum[indices[j]].sum() # Finally, calculate the orientation entropy based on the standard # statistical formula: # E = H(θ) * log(H(θ)) if not histogram.all(): entropy = 0 else: entropy = - (histogram * sp.log(histogram)).sum() if sp.isnan(entropy): print histogram print fourier_spectrum sys.exit(1) # The scaling attempts to make the entropy value the same order as the # Cb and Cr values. This does not guarantee a range of 0-255 however. scaling = (BASE_PIXEL_GROUP_SIZE / num_pixels) ** 2 feature_vector[i] = entropy * scaling return feature_vector
from scipy.interpolate import griddata mesh_x = S.loadtxt("mesh_x.txt") mesh_y = S.loadtxt("mesh_y.txt") the_splines = list() for i in range(mesh_x.shape[0]): the_splines.append(ParametricSpline(mesh_x[i], mesh_y[i])) SAMPLE_NUMBER = 100 ts = S.linspace(0.0, 1.0, SAMPLE_NUMBER) old_xy = S.vstack([aspline(ts) for aspline in the_splines]) new_xy = S.vstack([ S.hstack([i * S.ones((SAMPLE_NUMBER, 1)), ts.reshape(-1, 1)]) for i in range(len(the_splines)) ]) new_xs = griddata(old_xy, new_xy[:, 0], (x, z), method='linear') new_ys = griddata(old_xy, new_xy[:, 1], (x, z), method='linear') disp_genes = [ "kni__3", "D__3", "hbP__3", "bcdP__3", "KrP__3", "gt__3", "eve__3", "odd__3", "rho__3", "sna__3" ] #disp_genes = ["eve__3"] for one_gene_name in disp_genes: colnum = results[0]["column"].index(one_gene_name) - 1
def create_incidence_matrix(self, weights=None, fmt='coo', drop_zeros=False): r""" Creates a weighted incidence matrix in the desired sparse format Parameters ---------- weights : array_like, optional An array containing the throat values to enter into the matrix (In graph theory these are known as the 'weights'). If omitted, ones are used to create a standard incidence matrix representing connectivity only. fmt : string, optional The sparse storage format to return. Options are: **'coo'** : (default) This is the native format of OpenPNMs data **'lil'** : Enables row-wise slice of the matrix **'csr'** : Favored by most linear algebra routines **'dok'** : Enables subscript access of locations drop_zeros : boolean (default is ``False``) If ``True``, applies the ``eliminate_zeros`` method of the sparse array to remove all zero locations. Returns ------- An incidence matrix in the specified sparse format Notes ----- The incidence matrix is a cousin to the adjacency matrix, and used by OpenPNM for finding the throats connected to a give pore or set of pores. Specifically, an incidence matrix has Np rows and Nt columns, and each row represents a pore, containing non-zero values at the locations corresponding to the indices of the throats connected to that pore. The ``weights`` argument indicates what value to place at each location, with the default being 1's to simply indicate connections. Another useful option is throat indices, such that the data values on each row indicate which throats are connected to the pore, though this is redundant as it is identical to the locations of non-zeros. Examples -------- >>> import openpnm as op >>> pn = op.network.Cubic(shape=[5, 5, 5]) >>> weights = sp.rand(pn.num_throats(), ) < 0.5 >>> im = pn.create_incidence_matrix(weights=weights, fmt='csr') """ # Check if provided data is valid if weights is None: weights = sp.ones((self.Nt, ), dtype=int) elif sp.shape(weights)[0] != self.Nt: raise Exception('Received dataset of incorrect length') conn = self['throat.conns'] row = conn[:, 0] row = sp.append(row, conn[:, 1]) col = sp.arange(self.Nt) col = sp.append(col, col) weights = sp.append(weights, weights) temp = sprs.coo.coo_matrix((weights, (row, col)), (self.Np, self.Nt)) if drop_zeros: temp.eliminate_zeros() # Convert to requested format if fmt == 'coo': pass # temp is already in coo format elif fmt == 'csr': temp = temp.tocsr() elif fmt == 'lil': temp = temp.tolil() elif fmt == 'dok': temp = temp.todok() return temp
def create_adjacency_matrix(self, weights=None, fmt='coo', triu=False, drop_zeros=False): r""" Generates a weighted adjacency matrix in the desired sparse format Parameters ---------- weights : array_like, optional An array containing the throat values to enter into the matrix (in graph theory these are known as the 'weights'). If the array is Nt-long, it implies that the matrix is symmetric, so the upper and lower triangular regions are mirror images. If it is 2*Nt-long then it is assumed that the first Nt elements are for the upper triangle, and the last Nt element are for the lower triangular. If omitted, ones are used to create a standard adjacency matrix representing connectivity only. fmt : string, optional The sparse storage format to return. Options are: **'coo'** : (default) This is the native format of OpenPNM data **'lil'** : Enables row-wise slice of the matrix **'csr'** : Favored by most linear algebra routines **'dok'** : Enables subscript access of locations triu : boolean (default is ``False``) If ``True``, the returned sparse matrix only contains the upper- triangular elements. This argument is ignored if the ``weights`` array is 2*Nt-long. drop_zeros : boolean (default is ``False``) If ``True``, applies the ``eliminate_zeros`` method of the sparse array to remove all zero locations. Returns ------- An adjacency matrix in the specified Scipy sparse format. Notes ----- The adjacency matrix is used by OpenPNM for finding the pores connected to a give pore or set of pores. Specifically, an adjacency matrix has Np rows and Np columns. Each row represents a pore, containing non-zero values at the locations corresponding to the indices of the pores connected to that pore. The ``weights`` argument indicates what value to place at each location, with the default being 1's to simply indicate connections. Another useful option is throat indices, such that the data values on each row indicate which throats are connected to the pore. Examples -------- >>> import openpnm as op >>> pn = op.network.Cubic(shape=[5, 5, 5]) >>> weights = sp.rand(pn.num_throats(), ) < 0.5 >>> am = pn.create_adjacency_matrix(weights=weights, fmt='csr') """ # Check if provided data is valid if weights is None: weights = sp.ones((self.Nt, ), dtype=int) elif sp.shape(weights)[0] not in [self.Nt, 2 * self.Nt, (self.Nt, 2)]: raise Exception('Received weights are of incorrect length') # Append row & col to each other, and data to itself conn = self['throat.conns'] row = conn[:, 0] col = conn[:, 1] if weights.shape == (2 * self.Nt, ): row = sp.append(row, conn[:, 1]) col = sp.append(col, conn[:, 0]) elif weights.shape == (self.Nt, 2): row = sp.append(row, conn[:, 1]) col = sp.append(col, conn[:, 0]) weights = weights.flatten(order='F') elif not triu: row = sp.append(row, conn[:, 1]) col = sp.append(col, conn[:, 0]) weights = sp.append(weights, weights) # Generate sparse adjacency matrix in 'coo' format temp = sprs.coo_matrix((weights, (row, col)), (self.Np, self.Np)) if drop_zeros: temp.eliminate_zeros() # Convert to requested format if fmt == 'coo': pass # temp is already in coo format elif fmt == 'csr': temp = temp.tocsr() elif fmt == 'lil': temp = temp.tolil() elif fmt == 'dok': temp = temp.todok() return temp
def calc_risk_scores(bed_file, rs_id_map, phen_map, out_file=None, split_by_chrom=False, adjust_for_sex=False, adjust_for_covariates=False, adjust_for_pcs=False, non_zero_chromosomes=None, only_score=False, verbose=False, summary_dict=None): print('Parsing PLINK bed file: %s' % bed_file) if split_by_chrom: num_individs = len(phen_map) assert num_individs > 0, 'No individuals found. Problems parsing the phenotype file?' pval_derived_effects_prs = sp.zeros(num_individs) for i in range(1, 23): if non_zero_chromosomes is None or i in non_zero_chromosomes: genotype_file = bed_file + '_%i_keep' % i if os.path.isfile(genotype_file + '.bed'): if verbose: print('Working on chromosome %d' % i) prs_dict = get_prs(genotype_file, rs_id_map, phen_map, only_score=only_score, verbose=verbose) pval_derived_effects_prs += prs_dict[ 'pval_derived_effects_prs'] elif verbose: print('Skipping chromosome') else: prs_dict = get_prs(bed_file, rs_id_map, phen_map, only_score=only_score, verbose=verbose) num_individs = len(prs_dict['iids']) pval_derived_effects_prs = prs_dict['pval_derived_effects_prs'] if only_score: write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs) res_dict = {} elif sp.std(prs_dict['true_phens']) == 0: print('No variance left to explain in phenotype.') res_dict = {'pred_r2': 0} else: # Report prediction accuracy assert len( phen_map ) > 0, 'No individuals found. Problems parsing the phenotype file?' pval_eff_corr = sp.corrcoef(pval_derived_effects_prs, prs_dict['true_phens'])[0, 1] pval_eff_r2 = pval_eff_corr**2 res_dict = {'pred_r2': pval_eff_r2} pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1) true_phens = sp.array(prs_dict['true_phens']) true_phens.shape = (len(true_phens), 1) # Store covariate weights, slope, etc. weights_dict = {} # Store Adjusted predictions adj_pred_dict = {} # Direct effect Xs = sp.hstack( [pval_derived_effects_prs, sp.ones((len(true_phens), 1))]) (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)), true_phens) (betas, rss, r, s) = linalg.lstsq(Xs, true_phens) pred_r2 = 1 - rss / rss00 weights_dict['unadjusted'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0] } if verbose: print('PRS correlation: %0.4f' % pval_eff_corr) print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2) # Adjust for sex if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq(sp.hstack([sex, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1))]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0] } if verbose: print( 'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:', betas) adj_pred_dict['sex_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Sex_adj_pred_r2+Sex'] = pred_r2 # Adjust for PCs if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0: pcs = prs_dict['pcs'] (betas, rss0, r, s) = linalg.lstsq(sp.hstack([pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack( [pval_derived_effects_prs, sp.ones((len(true_phens), 1)), pcs]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['pc_adj'] = { 'Intercept': betas[1][0], 'ldpred_prs_effect': betas[0][0], 'pcs': betas[2][0] } adj_pred_dict['pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_adj_pred_r2+PC'] = pred_r2 # Adjust for both PCs and Sex if adjust_for_sex and 'sex' in prs_dict and len( prs_dict['sex']) > 0: sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack([sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, sex, sp.ones((len(true_phens), 1)), pcs ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) weights_dict['sex_pc_adj'] = { 'Intercept': betas[2][0], 'ldpred_prs_effect': betas[0][0], 'sex': betas[1][0], 'pcs': betas[3][0] } adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2 # Adjust for covariates if adjust_for_covariates and 'covariates' in prs_dict and len( prs_dict['covariates']) > 0: covariates = prs_dict['covariates'] (betas, rss0, r, s) = linalg.lstsq( sp.hstack([covariates, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_adj_pred_r2+Cov'] = pred_r2 if adjust_for_pcs and 'pcs' in prs_dict and len( prs_dict['pcs']) and 'sex' in prs_dict and len( prs_dict['sex']) > 0: pcs = prs_dict['pcs'] sex = sp.array(prs_dict['sex']) sex.shape = (len(sex), 1) (betas, rss0, r, s) = linalg.lstsq( sp.hstack( [covariates, sex, pcs, sp.ones((len(true_phens), 1))]), true_phens) Xs = sp.hstack([ pval_derived_effects_prs, covariates, sex, pcs, sp.ones((len(true_phens), 1)) ]) (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens) adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas) pred_r2 = 1 - rss_pd / rss0 print( 'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2 pred_r2 = 1 - rss_pd / rss00 print( 'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)' % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs))) res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2 # Now calibration y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens) denominator = sp.dot(pval_derived_effects_prs.T, pval_derived_effects_prs) numerator = sp.dot(pval_derived_effects_prs.T, y_norm) regression_slope = (numerator / denominator)[0][0] if verbose: print('The slope for predictions with weighted effects is: %0.4f' % regression_slope) num_individs = len(prs_dict['pval_derived_effects_prs']) # Write PRS out to file. if out_file != None: write_scores_file(out_file, prs_dict, pval_derived_effects_prs, adj_pred_dict, weights_dict=weights_dict) return res_dict
def from_networkx(cls, G, project=None): r""" Add data to an OpenPNM Network from a undirected NetworkX graph object. Parameters ---------- G : networkx.classes.graph.Graph Object The NetworkX graph. G should be undirected. The numbering of nodes should be numeric (int's), zero-based and should not contain any gaps, i.e. ``G.nodes() = [0,1,3,4,5]`` is not allowed and should be mapped to ``G.nodes() = [0,1,2,3,4]``. project : OpenPNM Project object A GenericNetwork is created and added to the specified Project. If no Project is supplied then one will be created and returned. Returns ------- An OpenPNM Project containing a GenericNetwork with all the data from the NetworkX object. """ net = {} # Ensure G is an undirected networkX graph with numerically numbered # nodes for which numbering starts at 0 and does not contain any gaps if not isinstance(G, nx.Graph): raise ('Provided object is not a NetworkX graph.') if nx.is_directed(G): raise ('Provided graph is directed. Convert to undirected graph.') if not all(isinstance(n, int) for n in G.nodes()): raise ('Node numbering is not numeric. Convert to int.') if min(G.nodes()) != 0: raise ('Node numbering does not start at zero.') if max(G.nodes()) + 1 != len(G.nodes()): raise ('Node numbering contains gaps. Map nodes to remove gaps.') # Parsing node data Np = len(G) net.update({'pore.all': sp.ones((Np, ), dtype=bool)}) for n, props in G.nodes(data=True): for item in props.keys(): val = props[item] dtype = type(val) # Remove prepended pore. and pore_ if present for b in ['pore.', 'pore_']: item = item.replace(b, '') # Create arrays for subsequent indexing, if not present already if 'pore.' + item not in net.keys(): if dtype == str: # handle strings of arbitrary length net['pore.' + item] = sp.ndarray((Np, ), dtype='object') elif dtype is list: dtype = type(val[0]) if dtype == str: dtype = 'object' cols = len(val) net['pore.' + item] = sp.ndarray((Np, cols), dtype=dtype) else: net['pore.' + item] = sp.ndarray((Np, ), dtype=dtype) net['pore.' + item][n] = val # Parsing edge data # Deal with conns explicitly try: conns = list(G.edges) # NetworkX V2 except: conns = G.edges() # NetworkX V1 conns.sort() # Add conns to Network Nt = len(conns) net.update({'throat.all': sp.ones(Nt, dtype=bool)}) net.update({'throat.conns': sp.array(conns)}) # Scan through each edge and extract all its properties i = 0 for t in conns: props = G[t[0]][t[1]] for item in props: val = props[item] dtype = type(val) # Remove prepended throat. and throat_ if present for b in ['throat.', 'throat_']: item = item.replace(b, '') # Create arrays for subsequent indexing, if not present already if 'throat.' + item not in net.keys(): if dtype == str: net['throat.' + item] = sp.ndarray((Nt, ), dtype='object') if dtype is list: dtype = type(val[0]) if dtype == str: dtype = 'object' cols = len(val) net['throat.' + item] = sp.ndarray((Nt, cols), dtype=dtype) else: net['throat.' + item] = sp.ndarray((Nt, ), dtype=dtype) net['throat.' + item][i] = val i += 1 network = GenericNetwork(project=project) network = cls._update_network(network=network, net=net) return network.project
def entry_point(): parser = OptionParser() # input files parser.add_option("--bfile", dest='bfile', type=str, default=None) parser.add_option("--pfile", dest='pfile', type=str, default=None) parser.add_option("--efile", dest='efile', type=str, default=None) parser.add_option("--ffile", dest='ffile', type=str, default=None) # output file parser.add_option("--ofile", dest='ofile', type=str, default=None) # phenotype filtering parser.add_option("--pheno_id", dest='pheno_id', type=str, default=None) # snp filtering options parser.add_option("--idx_start", dest='i0', type=int, default=None) parser.add_option("--idx_end", dest='i1', type=int, default=None) parser.add_option("--chrom", dest='chrom', type=int, default=None) parser.add_option("--pos_start", dest='pos_start', type=int, default=None) parser.add_option("--pos_end", dest='pos_end', type=int, default=None) # size of batches to load into memory parser.add_option( "--batch_size", dest='batch_size', type=int, default=1000) # analysis options parser.add_option("--rhos", dest='rhos', type=str, default=None) parser.add_option( "--unique_variants", action="store_true", dest='unique_variants', default=False) parser.add_option( "--no_interaction_test", action="store_true", dest='no_interaction_test', default=False) (opt, args) = parser.parse_args() # assert stuff assert opt.bfile is not None, 'Specify bed file!' assert opt.pfile is not None, 'Specify pheno file!' assert opt.efile is not None, 'Specify env file!' assert opt.ofile is not None, 'Specify out file!' if opt.rhos is None: opt.rhos = '0.,.2,.4,.6,.8,1.' # import geno and subset reader = BedReader(opt.bfile) query = build_geno_query( idx_start=opt.i0, idx_end=opt.i1, chrom=opt.chrom, pos_start=opt.pos_start, pos_end=opt.pos_end) reader.subset_snps(query, inplace=True) # pheno y = import_one_pheno_from_csv( opt.pfile, pheno_id=opt.pheno_id, standardize=True) # import environment E = sp.loadtxt(opt.efile) # import fixed effects if opt.ffile is None: covs = sp.ones((E.shape[0], 1)) else: covs = sp.loadtxt(opt.ffile) # extract rhos rhos = sp.array(opt.rhos.split(','), dtype=float) # run analysis res = run_struct_lmm( reader, y, E, covs=covs, rhos=rhos, batch_size=opt.batch_size, no_interaction_test=opt.no_interaction_test, unique_variants=opt.unique_variants) # export print 'Export to %s' % opt.ofile make_out_dir(opt.ofile) res.to_csv(opt.ofile, index=False)
def Kdiag(self, theta, x1): sigma = SP.exp(2 * theta) return sigma * SP.ones(x1.shape[0])
def fit_starcolumn(size, savepng): import pylab, scipy boxes = [] coords = [] for increment in [0, 0.03]: # ,0.075,0.1]: #1,0.125,0.15,0.175]: #print size a, b, varp = pylab.hist(size, bins=scipy.arange(0 + increment, 2 + increment, 0.06)) #print a, b boxes += list(a) coords += list(b[:-1] + scipy.ones(len(b[:-1])) * (0.03)) tot = scipy.array(boxes).sum() print tot all = zip(coords, boxes) all.sort(sortit_rev) print all sum = 0 max = 0 min = 1000000 foundCenter = False from copy import copy print all, 'all' for x, y in all: print x, y, sum, tot sum += y if float(sum) / tot > 0.05: if y > max and not foundCenter: max = copy(y) max_x = copy(x) print 'max', max if y / max < 0.98 and not foundCenter: center = copy(max_x) print center, 'center' foundCenter = True if foundCenter: print 'min', min, y if min > y: min = copy(y) min_x = copy(x) print y, min if y / float(min) > 1.05: right = copy(min_x) break left = center - 1. * abs(right - center) print center, right, 'center, right' print len(boxes), len(coords) pylab.clf() pylab.scatter(coords, boxes) pylab.xlim(0, 2.5) pylab.xlabel('SIZE (arcsec)') pylab.axvline(x=center, ymin=-10, ymax=10) pylab.axvline(x=left, ymin=-10, ymax=10) pylab.axvline(x=right, ymin=-10, ymax=10) pylab.savefig(savepng) pylab.clf() return left, right
def vl_phow(im, verbose=False, fast=True, sizes=[4, 6, 8, 10], step=2, color='rgb', floatdescriptors=False, magnif=6, windowsize=1.5, contrastthreshold=0.005): opts = Options(verbose, fast, sizes, step, color, floatdescriptors, magnif, windowsize, contrastthreshold) dsiftOpts = DSiftOptions(opts) # make sure image is float, otherwise segfault im = array(im, 'float32') # Extract the features imageSize = shape(im) if im.ndim == 3: if imageSize[2] != 3: # "IndexError: tuple index out of range" if both if's are checked at the same time raise ValueError("Image data in unknown format/shape") if opts.color == 'gray': numChannels = 1 if (im.ndim == 2): im = vl_rgb2gray(im) else: numChannels = 3 if (im.ndim == 2): im = dstack([im, im, im]) if opts.color == 'rgb': pass elif opts.color == 'opponent': # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Note that the mean differs from the standard definition of opponent # space and is the regular intesity (for compatibility with # the contrast thresholding). # Note also that the mean is added pack to the other two # components with a small multipliers for monochromatic # regions. mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2] alpha = 0.01 im = dstack([mu, (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu, (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu]) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') if opts.verbose: print('{0}: color space: {1}'.format('vl_phow', opts.color)) print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1])) print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes)) frames_all = [] descrs_all = [] for size_of_spatial_bins in opts.sizes: # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m # Recall from VL_DSIFT() that the first descriptor for scale SIZE has # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is # similar). It is convenient to align the descriptors at different # scales so that they have the same geometric centers. For the # maximum size we pick XMIN = 1 and we get centers starting from # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES). # In pracrice, the offset must be integer ('bounds'), so the # alignment works properly only if all OPTS.SZES are even or odd. off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1 # smooth the image to the appropriate scale based on the size # of the SIFT bins sigma = size_of_spatial_bins / float(opts.magnif) ims = vl_imsmooth(im, sigma) # extract dense SIFT features from all channels frames = [] descrs = [] for k in range(numChannels): size_of_spatial_bins = int(size_of_spatial_bins) # vl_dsift does not accept numpy.int64 or similar f_temp, d_temp = vl_dsift(image=ims[:, :, k], step=dsiftOpts.step, size=size_of_spatial_bins, fast=dsiftOpts.fast, verbose=dsiftOpts.verbose, norm=dsiftOpts.norm,) frames.append(f_temp.T) descrs.append(d_temp.T) frames = array(frames) descrs = array(descrs) d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]] descrs = descrs.reshape(d_new_shape) # remove low contrast descriptors # note that for color descriptors the V component is # thresholded if (opts.color == 'gray') | (opts.color == 'opponent'): contrast = frames[0][2, :] elif opts.color == 'rgb': contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0) else: raise ValueError('Color option ' + str(opts.color) + ' not recognized') descrs = descrs[:, contrast > opts.contrastthreshold] frames = frames[0][:, contrast > opts.contrastthreshold] # save only x,y, and the scale frames_temp = array(frames[0:3, :]) padding = array(size_of_spatial_bins * ones(frames[0].shape)) frames_to_add = vstack([frames_temp, padding]) # print("Shape of frame for each window", frames_to_add.shape) # print("Shape of descriptors for each window", descrs.shape) # print("Sample Frame", frames_to_add[:,:1]) frames_all.append(vstack([frames_temp, padding])) descrs_all.append(array(descrs)) frames_all = hstack(frames_all) # print("length of descriptors ", len(descrs_all)) descrs_all = hstack(descrs_all) # print("Frames Shape", frames_all.shape) # print("Descriptors shape", descrs_all.shape) # print(np.unique(descrs_all, return_counts=True)) return frames_all.T[:,:2], descrs_all.T
self._allEvaluations = [] tmp = [self._sample2base(self._produceSample()) for _ in range(self.batchSize)] list(map(self._oneEvaluation, tmp)) self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated))) def _learnStep(self): # produce samples self._produceSamples() samples = list(map(self._base2sample, self._population)) #compute utilities utilities = self.shapingFunction(self._currentEvaluations) utilities /= sum(utilities) # make the utilities sum to 1 if self.uniformBaseline: utilities -= 1. / self.batchSize # update center dCenter = dot(utilities, samples) self._center += self.centerLearningRate * self._sigmas * dCenter # update variances covGradient = dot(utilities, [s ** 2 - 1 for s in samples]) dA = 0.5 * self.covLearningRate * covGradient self._sigmas = self._sigmas * exp(dA) if __name__ == "__main__": from pybrain.rl.environments.functions.unimodal import ElliFunction print((SNES(ElliFunction(100), ones(100), verbose=True).learn()))
def fit(colors, c1, c2, m, savepng): import pylab, scipy ''' essentially fine resolution binning ''' boxes = [] coords = [] for increment in [0, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175]: a, b, varp = pylab.hist(colors, bins=scipy.arange(-4 + increment, 4 + increment, 0.2)) #print a, b boxes += list(a) coords += list(b[:-1] + scipy.ones(len(b[:-1])) * (0.1)) print len(colors), colors, 'len' tot = scipy.array(boxes).sum() print tot solutions = [] for version in ['reverse']: #:,'forward']: left = -99 center = -99 all = zip(coords, boxes) if version == 'reverse': all.sort(sortit) if version == 'forward': all.sort(sortit_rev) print all pylab.clf() pylab.scatter(coords, boxes) #pylab.show() print 'plotted' sum = 0 max_y = 0 min = 1000000 foundCenter = False from copy import copy print all, 'all' rev = zip(all[:][1], all[:][0]) a = zip(boxes, coords) a.sort() peak = a[-1][1] foundCenter = False for x, y in all: print x, y, sum, tot print max_y, min, foundCenter, peak sum += y #print all[-1][0], all[0][0] if sum > 0: if float( tot ) / sum > 0.05 and y > 100: #True: # (all[-1][0] < all[0][0] and x < peak ) or (all[-1][0] > all[0][0] and x > peak ): # if y > max_y and not foundCenter: max_y = copy(y) max_x = copy(x) print 'max', max_y print y / max_y, (max_y - y) if y / max_y < 0.98 and (max_y - y) > 15 and not foundCenter: center = copy(max_x) print center, 'center', max_y foundCenter = True #center = peak if foundCenter: print 'min', min, y if min > y: min = copy(y) min_x = copy(x) print y, min, x if y / float(min) > 1.04: left = copy(min_x) print peak, left, center, 'FOUND ONE' break if left != -99: if left > center: left = center - max(0.05, abs(center - left)) right = center + max(0.4, 1. * abs(left - center)) print center, left, right, peak print right - peak, peak - left if True: #right - peak > 0 and peak - left > 0: solutions.append([center, left, right]) ''' pick out the narrower solution ''' if len(solutions) > 1: if solutions[0][0] - solutions[0][1] < solutions[1][0] - solutions[1][ 1]: solution = solutions[0] else: solution = solutions[1] else: solution = solutions[0] center, left, right = solution print center, left, right print len(boxes), len(coords) #print boxes, coords pylab.clf() pylab.scatter(coords, boxes) pylab.xlabel(c1 + ' - ' + c2) pylab.axvline(x=center, ymin=-10, ymax=10) pylab.axvline(x=left, ymin=-10, ymax=10) pylab.axvline(x=right, ymin=-10, ymax=10) pylab.savefig(savepng) return left, right
xi = xi(r) def f_xiSB(r, am3, am2, am1, a0, a1): par = [am3, am2, am1, a0, a1] model = sp.zeros((len(par), r.size)) tw = r != 0. model[0, tw] = par[0] / r[tw]**3 model[1, tw] = par[1] / r[tw]**2 model[2, tw] = par[2] / r[tw]**1 model[3, tw] = par[3] model[4, :] = par[4] * r model = sp.array(model) return model.sum(axis=0) w = ((r >= sb1_rmin) & (r < sb1_rmax)) | ((r >= sb2_rmin) & (r < sb2_rmax)) sigma = 0.1 * sp.ones(xi.size) sigma[(r >= sb1_rmin - 2.) & (r < sb1_rmin + 2.)] = 1.e-6 sigma[(r >= sb2_rmax - 2.) & (r < sb2_rmax + 2.)] = 1.e-6 popt, pcov = curve_fit(f_xiSB, r[w], xi[w], sigma=sigma[w]) model = f_xiSB(r, *popt) xiSB = xi.copy() ww = (r >= sb1_rmin) & (r < sb2_rmax) xiSB[ww] = model[ww] pkSB = nbodykit.cosmology.correlation.xi_to_pk(r, xiSB, extrap=True) pkSB = pkSB(k) pkSB *= pk[-1] / pkSB[-1] out = fitsio.FITS(args.out, 'rw', clobber=True) head = [{'name': k, 'value': float(v)} for k, v in cat.items()]
def run(): from optparse import OptionParser usage = "usage: python redsequence [options] \n\nIdentifies and fits the red sequence using apparent magnitude and one color.\nOption of identifying star column and only using objects larger.\n" parser = OptionParser(usage) parser.add_option("-c", "--cluster", help="name of cluster (i.e. MACS0717+37)") parser.add_option("-d", "--detectband", help="detection band (i.e. W-J-V)", default='W-J-V') parser.add_option( "--c1", help= "name of first filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-J-V)", default='MAG_APER1-SUBARU-COADD-1-W-J-V') parser.add_option( "--c2", help= "name of second filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-C-RC)", default='MAG_APER1-SUBARU-COADD-1-W-C-RC') parser.add_option( "-m", '--m', help= "name of filter to be used as 'galaxy magnitude' (default is '--c2')", default=None) parser.add_option("-s", "--starcolumn", help="add to filter out star column", action="store_true", default=False) parser.add_option('--lm', help="limiting magnitude applied to 'galaxy magnitude'", default=False) parser.add_option( '-r', "--center_radius", help= "maximum galaxy radius from cluster center (in arcsec) (default=440)", default=660.) parser.add_option("-l", "--location", help="write output directory", default=None) parser.add_option("-w", "--web", help="instead write to web (Pat's space)", action="store_true", default=False) parser.add_option( "-z", "--z", help= "see what the photometric redshifts are of redsequence galaxies (requires redshift catalog, obviously)", action='store_true', default=False) parser.add_option( "--cat", help= "name of alternate input catalog (if you don't want to use the default photometry catalog)", default=None) parser.add_option("--existingcolor", help="use existing colors of red sequence fit", action="store_true", default=False) parser.add_option("-e", "--existing", help="use existing red sequence fit", action="store_true", default=False) (options, args) = parser.parse_args() if options.m is None: options.m = options.c2 if options.location is not None and options.web: print 'Either specify location or web but not both at once' raise Exception if options.location is None and options.web is False: options.location = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/PHOTOMETRY_' + options.detectband + '_iso/' elif options.web: options.location = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + options.cluster + '/CWWSB_capak.list/' if options.location[-1] != '/': options.location = options.location + '/' print options.location import os if options.existingcolor or options.existing: dir = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/LENSING_' + options.detectband + '_' + options.detectband + '_aper/good/' dict = {} print 'file', dir + 'redseqfit_2.orig' redseqfit = open(dir + 'redseqfit_2.orig', 'r').readlines() slope = float(redseqfit[1].split('=')[1].split('*')[0]) intercept = float(redseqfit[1][:-1].split('+')[1]) upper_intercept = float(redseqfit[3][:-1].split('+')[1]) lower_intercept = float(redseqfit[4][:-1].split('+')[1]) polycoeffs = [slope, intercept] std = (upper_intercept - intercept) / 1.2 info = open(dir + 'redseq_all.params', 'r').readlines() print info, dir + 'redseq_all.params' for l in info: if len(l.split(':')) > 1: key, value = l[:-1].split(': ') dict[key] = value print dict #options.center_radius = dict['radcut'] def prefix(filt): if filt is 'g' or filt is 'r' or filt is 'u': return 'MAG_APER1-MEGAPRIME-COADD-1-' + filt else: return 'MAG_APER1-SUBARU-COADD-1-' + filt dict['slope'] = slope dict['intercept'] = intercept dict['lower_intercept'] = lower_intercept dict['upper_intercept'] = upper_intercept if options.existing: options.m = prefix(dict['xmag']) options.c1 = prefix(dict['greenmag']) options.c2 = prefix(dict['redmag']) options.lm = dict['magcut2'] print 'finished' elif options.existingcolor: options.c1 = prefix(dict['greenmag']) options.c2 = prefix(dict['redmag']) cluster = options.cluster c1 = options.c1 c2 = options.c2 m = options.m if options.z: import pyfits cat = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.APER1.1.CWWSB_capak.list.all.bpz.tab' p = pyfits.open(cat) photoz = p['STDTAB'].data zero_IDs = len(photoz[photoz.field('SeqNr') == 0]) if zero_IDs > 0: print 'Wrong photoz catalog?', cat print str(zero_IDs) + ' many SeqNr=0' raise Exception print cat if options.cat is None: #not hasattr(options,'cat'): input_mags = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.slr.alter.cat' else: input_mags = options.cat import pyfits, os, sys, pylab, do_multiple_photoz, commands, re, math, scipy from copy import copy print 'input magnitude catalog:', input_mags, options.cat, hasattr( options, 'cat') filterlist = do_multiple_photoz.get_filters(input_mags, 'OBJECTS') #print filterlist print input_mags w = pyfits.open(input_mags) mags = w['OBJECTS'].data #print mags.field('Xpos') mask = mags.field(c1) > -90 if options.z: photoz = photoz[mask] mags = mags[mask] mask = mags.field(c2) > -90 if options.z: photoz = photoz[mask] mags = mags[mask] mask = mags.field(m) > -90 if options.z: photoz = photoz[mask] mags = mags[mask] mask = mags.field('Flag') == 0 if options.z: photoz_star = photoz[mask] mags_star = mags[mask] #mask = mags_star.field(c2) < 23 ''' get cluster redshift ''' command = 'grep ' + cluster + ' ' + '/nfs/slac/g/ki/ki05/anja/SUBARU/' + '/clusters.redshifts ' print command cluster_info = commands.getoutput(command) cluster_redshift = float(re.split('\s+', cluster_info)[1]) print cluster_redshift if options.lm: mag_cut = float(options.lm) else: ''' compute faint magnitude cutoff ''' if m[-6:] == 'W-C-RC' or m[-1] == 'r': mag_cut = 21.5 + 2.5 * math.log10((cluster_redshift / 0.19)**2.) if m[-5:] == 'W-J-V' or m[-5:] == 'W-J-B' or m[-1] == 'g': mag_cut = 22. + 2.5 * math.log10((cluster_redshift / 0.19)**2.) if not options.center_radius: ''' compute radial size of cut ''' options.center_radius = 400 / (z / 0.4) options.center_radius = 400 print mag_cut, options.lm if True: #not options.existing: ''' identify star column (optional) ''' if options.starcolumn: savepng = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + cluster + '/seeing.png' left, right = fit_starcolumn( mags_star[mask].field('FLUX_RADIUS') * 0.2, savepng) savepng = options.location + 'column.png' pylab.axvline(x=left, ymin=-10, ymax=100) pylab.axvline(x=right, ymin=-10, ymax=100) pylab.scatter(mags.field('FLUX_RADIUS') * 0.2, mags.field(m), s=0.25) pylab.xlim(0, 2.5) pylab.xlabel('SIZE (arcsec)') pylab.ylabel(m) pylab.savefig(savepng) pylab.clf() mask = mags.field('FLUX_RADIUS') * 0.2 > right if options.z: photoz = photoz[mask] mags = mags[mask] ''' select galaxies near center of field ''' #options.center_radius=240 mask = ((mags.field('Xpos') - 5000. * scipy.ones(len(mags)))**2. + (mags.field('Ypos') - 5000. * scipy.ones(len(mags)))** 2.)**0.5 * 0.2 < float(options.center_radius) if options.z: photoz = photoz[mask] mags = mags[mask] print len(mags) if options.z: print len(photoz) from copy import copy mags_mask = copy(mags) x = copy(mags.field(m)) y = copy(mags.field(c1) - mags.field(c2)) print mags.field(c1), mags.field(c2), c1, c2 mask = x < mag_cut print mag_cut #print x, y savedir = options.location os.system('mkdir -p ' + savedir) savepng = options.location + 'redselection.png' print options.center_radius, len(y[mask]) left, right = fit(y[mask], c1, c2, m, savepng) if options.z: mask = photoz.field('NFILT') > 3 reg_mags = mags_mask[mask] reg_photoz = photoz[mask] mask = photoz.field('BPZ_ODDS') > 0.95 reg_mags = mags_mask[mask] reg_photoz = photoz[mask] print len(reg_photoz) print 'making reg' reg = open('all.reg', 'w') reg.write( 'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n' ) for i in range(len(reg_mags.field('Xpos'))): reg.write('circle(' + str(reg_mags.field('Xpos')[i]) + ',' + str(reg_mags.field('Ypos')[i]) + ',' + str(5) + ') # color=red width=2 text={' + str(reg_photoz.field('BPZ_Z_B')[i]) + '}\n') reg.close() print 'finished reg' mask = x < mag_cut if options.z: photoz2 = photoz[mask] mags_mask = mags_mask[mask] x2 = x[mask] y2 = y[mask] #print sorted(x2) print savepng print left, right if not options.existing: mask = y2 > left if options.z: photoz2 = photoz2[mask] mags_mask = mags_mask[mask] x2 = x2[mask] y2 = y2[mask] mask = y2 < right if options.z: photoz2 = photoz2[mask] mags_mask = mags_mask[mask] x2 = x2[mask] y2 = y2[mask] if not options.existing: polycoeffs = scipy.polyfit(x2, y2, 1) print polycoeffs yfit = scipy.polyval(polycoeffs, x2) print x2, yfit if not options.existing: std = scipy.std(abs(yfit - y2)) print std mask = abs(yfit - y2) < std * 2.5 if options.z: photoz3 = photoz2[mask] x3 = x2[mask] y3 = y2[mask] if not options.existing: polycoeffs = scipy.polyfit(x3, y3, 1) print polycoeffs yfit = scipy.polyval(polycoeffs, sorted(x2)) print x2, yfit if not options.existing: std = scipy.std(abs(yfit - y2)) print std std_fac = 1.2 mask = abs(yfit - y2) < std * std_fac if options.z: photoz2 = photoz2[mask] mags_mask = mags_mask[mask] print photoz2.field('SeqNr') print photoz2.field('BPZ_Z_B') fred = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.redseq' f = open(fred, 'w') for id in photoz2.field('SeqNr'): f.write(str(id) + '\n') f.close() reg = open('regseq.reg', 'w') reg.write( 'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n' ) for i in range(len(mags_mask.field('Xpos'))): reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' + str(mags_mask.field('Ypos')[i]) + ',' + str(5) + ') # color=green width=2 text={' + str(photoz2.field('BPZ_Z_B')[i]) + '}\n') reg.close() pylab.clf() savepng = options.location + 'redhistogram.png' savepdf = options.location + 'redhistogram.pdf' if options.z: lower_lim = cluster_redshift - 0.3 if lower_lim < 0: lower_lim = 0.0001 print photoz2.field('BPZ_Z_B') a, b, varp = pylab.hist(photoz2.field('BPZ_Z_B'), bins=scipy.arange(lower_lim, cluster_redshift + 0.3, 0.01), color='red') pylab.axvline(x=cluster_redshift, ymin=0, ymax=100, color='blue', linewidth=3) pylab.xlabel('Redshift') pylab.ylabel('Galaxies') pylab.savefig(savepng) pylab.savefig(savepdf) reg = open('reg.reg', 'w') reg.write( 'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n' ) for i in range(len(mags_mask.field('Xpos'))): reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' + str(mags_mask.field('Ypos')[i]) + ',' + str(5) + ') # color=blue width=2 text={' + str(photoz2.field('BPZ_Z_B')[i]) + '}\n') reg.close() pylab.clf() pylab.plot(sorted(x2), yfit, 'b-') pylab.plot(sorted(x2), yfit + scipy.ones(len(yfit)) * std * std_fac, 'b-') pylab.plot(sorted(x2), yfit - scipy.ones(len(yfit)) * std * std_fac, 'b-') pylab.scatter(x, y, color='red', s=0.5) pylab.axhline(y=left, xmin=-10, xmax=100) pylab.axvline(x=mag_cut, ymin=-10, ymax=10) pylab.axhline(y=right, xmin=-10, xmax=100) pylab.xlabel(m) pylab.ylabel(c1 + ' - ' + c2) if options.z: mask = abs(photoz.field('BPZ_Z_B') - cluster_redshift) < 0.04 mags = mags[mask] photoz = photoz[mask] mask = photoz.field('NFILT') > 4 mags = mags[mask] photoz = photoz[mask] print 'priormag' print photoz.field('priormag') print 'nfilt' print photoz.field('NFILT') import pylab x = mags.field(m) y = mags.field(c1) - mags.field(c2) pylab.scatter(x, y, s=0.5) reg = open('reg.reg', 'w') reg.write( 'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n' ) for i in range(len(mags.field('Xpos'))): reg.write('circle(' + str(mags.field('Xpos')[i]) + ',' + str(mags.field('Ypos')[i]) + ',' + str(5) + ') # color=red width=2 text={' + str(photoz.field('BPZ_Z_B')[i]) + '}\n') reg.close() pylab.xlim(sorted(x)[0], sorted(x)[-2]) span = (sorted(y)[-2] - sorted(y)[2]) / 2 if span > 1: span = 1 median = scipy.median(scipy.array(y)) pylab.ylim(median - 2, median + 2) savepng = options.location + 'cmd.png' pylab.savefig(savepng) pylab.clf() pylab.scatter(mags.field('Xpos'), mags.field('Ypos'), s=0.02) pylab.xlim([0, 10000]) pylab.ylim([0, 10000]) pylab.xlabel('X Pixel') pylab.ylabel('Y Pixel') savepng = options.location + '/positions.png' print savepng pylab.savefig(savepng) s = "\nBest fit: y = " + str(polycoeffs[0]) + "*x +" + str( polycoeffs[1]) + '\n' s += "\nCut: y < " + str( polycoeffs[0]) + "*x +" + str(polycoeffs[1] + std_fac * std) + '\n' s += "Cut: y > " + str( polycoeffs[0]) + "*x +" + str(polycoeffs[1] - std_fac * std) + '\n' s += "x < " + str(mag_cut) + '\n' s += 'x = ' + m + '\n' s += 'y = ' + c1 + ' - ' + c2 + '\n' print s f = open(options.location + '/redseqfit', 'w') f.write(s) f.close() from datetime import datetime t2 = datetime.now() print options.location f = open(options.location + '/redsequence.html', 'w') f.write( '<html><tr><td>' + t2.strftime("%Y-%m-%d %H:%M:%S") + '</td></tr><tr><td><h2>Photometric Redshifts of the Red Sequence</h2></td></tr><tr><td><img src="redhistogram.png"></img></td></tr><tr><td><img src="seeing.png"></img></td></tr><<tr><td><img src="column.png"></img></td></tr><tr><td><img src="redselection.png"></img></td></tr><tr><td><img src="cmd.png"></img></td></tr><tr><td><img src="positions.png"></img></td></tr><tr><td>' + s.replace('\n', '<br>') + '</td></tr> </html>') print 'Wrote output to:', options.location print 'Best fit parameters in:', options.location + '/redseqfit'
def fitLMM(self, K=None, tech_noise=None, idx=None, i0=None, i1=None, verbose=False): """ Args: K: list of random effects to be considered in the analysis if K is none, it does not consider any random effect idx: indices of the genes to be considered in the analysis i0: gene index from which the anlysis starts i1: gene index to which the analysis stops verbose: if True, print progresses Returns: pv: matrix of pvalues beta: matrix of correlations info: dictionary annotates pv and beta rows and columns, containing gene_idx_row: index of the genes in rows conv: boolean vetor marking genes for which variance decomposition has converged gene_row: annotate rows of matrices """ assert self.var is not None, 'scLVM:: when multiple hidden factors are considered, varianceDecomposition decomposition must be used prior to this method' # print QTL if idx is None: if i0 is None or i1 is None: i0 = 0 i1 = self.G idx = SP.arange(i0, i1) elif not isinstance(idx, SP.ndarray): idx = SP.array([idx]) if K is not None and not isinstance(K, list): K = [K] lmm_params = { 'covs': SP.ones([self.N, 1]), 'NumIntervalsDeltaAlt': 100, 'NumIntervalsDelta0': 100, 'searchDelta': True } Ystd = self.Y - self.Y.mean(0) Ystd /= self.Y.std(0) beta = SP.zeros((idx.shape[0], self.G)) pv = SP.zeros((idx.shape[0], self.G)) geneID = SP.zeros(idx.shape[0], dtype=str) count = 0 var = self.var / self.var.sum(1)[:, SP.newaxis] for ids in idx: if verbose: print('.. fitting gene %d' % ids) # extract a single gene if K is not None: if len(K) > 1: if self.var_info['conv'][count] == True: _K = SP.sum( [var[count, i] * K[i] for i in range(len(K))], 0) _K /= _K.diagonal().mean() else: _K = None else: _K = K[0] else: _K = None lm = QTL.test_lmm(Ystd, Ystd[:, ids:ids + 1], K=_K, verbose=False, **lmm_params) pv[count, :] = lm.getPv()[0, :] beta[count, :] = lm.getBetaSNP()[0, :] if self.geneID is not None: geneID[count] = self.geneID[ids] count += 1 info = {'conv': self.var_info['conv'], 'gene_idx_row': idx} if geneID is not None: info['gene_row'] = geneID return pv, beta, info
def __init__(self, config): """A model of the spectrometer instrument, including spectral response and noise covariance matrices. Noise is typically calculated from a parametric model, fit for the specific instrument. It is a function of the radiance level.""" # If needed, skip first index column and/or convert to nanometers self.wavelength_file = config['wavelength_file'] q = s.loadtxt(self.wavelength_file) if q.shape[1] > 2: q = q[:, 1:] if q[0, 0] < 100: q = q * 1000.0 self.nchans = q.shape[0] self.wl = q[:, 0] self.fwhm = q[:, 1] self.bounds, self.scale, self.statevec = [], [], [] # noise specified as parametric model. if 'SNR' in config: self.model_type = 'SNR' self.snr = float(config['SNR']) else: self.noise_file = config['noise_file'] if self.noise_file.endswith('.txt'): # parametric version self.model_type = 'parametric' coeffs = s.loadtxt(self.noise_file, delimiter=' ', comments='#') p_a = interp1d(coeffs[:, 0], coeffs[:, 1], fill_value='extrapolate') p_b = interp1d(coeffs[:, 0], coeffs[:, 2], fill_value='extrapolate') p_c = interp1d(coeffs[:, 0], coeffs[:, 3], fill_value='extrapolate') self.noise = s.array([[p_a(w), p_b(w), p_c(w)] for w in self.wl]) elif self.noise_file.endswith('.mat'): self.model_type = 'pushbroom' D = loadmat(self.noise_file) nb = len(self.wl) self.ncols = D['columns'][0, 0] if nb != s.sqrt(D['bands'][0, 0]): raise ValueError( 'Noise model does not match wavelength # bands') cshape = ((self.ncols, nb, nb)) self.covs = D['covariances'].reshape(cshape) self.integrations = config['integrations'] # Variables not retrieved self.bvec = ['Cal_Relative_%04i' % int(w) for w in self.wl] if 'unknowns' in config: bval = [] for key, val in config['unknowns'].items(): if type(val) is str: u = s.loadtxt(val, comments='#') if (len(u.shape) > 0 and u.shape[1] > 1): u = u[:, 1] else: u = s.ones(len(self.wl)) * val bval.append(u) # unretrieved uncertainties combine via Root Sum Square... self.bval = s.sqrt(pow(s.array(bval), 2).sum(axis=0)) else: # no unknowns - measurement noise only self.bval = s.zeros(len(self.wl))
def hessian(self, params, epsf, relativeScale=True, stepSizeCutoff=None, jacobian=None, verbose=False): """ Returns the hessian of the model. epsf: Sets the stepsize to try relativeScale: If True, step i is of size p[i] * eps, otherwise it is eps stepSizeCutoff: The minimum stepsize to take jacobian: If the jacobian is passed, it will be used to estimate the step size to take. vebose: If True, a message will be printed with each hessian element calculated """ nOv = len(params) if stepSizeCutoff is None: stepSizeCutoff = scipy.sqrt(_double_epsilon_) params = scipy.asarray(params) if relativeScale: eps = epsf * abs(params) else: eps = epsf * scipy.ones(len(params), scipy.float_) # Make sure we don't take steps smaller than stepSizeCutoff eps = scipy.maximum(eps, stepSizeCutoff) if jacobian is not None: # Turn off the relative scaling since that would overwrite all this relativeScale = False jacobian = scipy.asarray(jacobian) if len(jacobian.shape) == 0: resDict = self.resDict(params) new_jacobian = scipy.zeros(len(params), scipy.float_) for key, value in resDict.items(): new_jacobian += 2.0 * value * scipy.array(jacobian[0][key]) jacobian = new_jacobian elif len(jacobian.shape) == 2: # Need to sum up the total jacobian residuals = scipy.asarray(self.res(params)) # Changed by rng7. I'm not sure what is meant by "sum up the # total jacobian". The following line failed due to shape # mismatch. From the context below, it seems that the dot # product is appropriate. #jacobian = 2.0*residuals*jacobian jacobian = 2.0 * scipy.dot(residuals, jacobian) # If parameters are independent, then # epsilon should be (sqrt(2)*J[i])^-1 factor = 1.0 / scipy.sqrt(2) for i in range(nOv): if jacobian[i] == 0.0: eps[i] = 0.5 * abs(params[i]) else: # larger than stepSizeCutoff, but not more than # half of the original parameter value eps[i] = min( max(factor / abs(jacobian[i]), stepSizeCutoff), 0.5 * abs(params[i])) ## compute cost at f(x) f0 = self.cost(params) hess = scipy.zeros((nOv, nOv), scipy.float_) ## compute all (numParams*(numParams + 1))/2 unique hessian elements for i in range(nOv): for j in range(i, nOv): hess[i][j] = self.hessian_elem(self.cost, f0, params, i, j, eps[i], eps[j], relativeScale, stepSizeCutoff, verbose) hess[j][i] = hess[i][j] return hess
Kallperm = sp.dot(Kallperm, Kallperm.T) Kallperm /= Kallperm.diagonal().mean() Kallperm += 1e-4 * sp.eye(Kallperm.shape[0]) vcperm = VarianceDecomposition(Y) vcperm.addFixedEffect() vcperm.addRandomEffect(K=Kallperm) vcperm.addRandomEffect(is_noise=True) vcperm.optimize() permlm0 = vcnull.getLML() - vcperm.getLML() perm_file.write( "\t".join(map(str, [permlm0, permlm1])) + "\n") ## get trans PCs S_R, U_R = sp.linalg.eigh(Kc) F1 = U_R[:, ::-1][:, :10] # add an intercept term F1 = sp.concatenate([F1, sp.ones((F1.shape[0], 1))], 1) test = "lrt" #specify type of statistical test lmm0 = qtl.test_lmm(snps=Msnps, pheno=Y, K=Kallstd, covs=F1, test=test) pvalues = lmm0.getPv( ) # 1xS vector of p-values (S=X.shape[1]) betas = lmm0.getBetaSNP( ) # 1xS vector of effect sizes (S=X.shape[1]) ses = lmm0.beta_ste # 1xS vector of effect sizes standard errors (S=X.shape[1] RV = Mpos RV["pvaluesCisPCs"] = pvalues.T RV["betasCisPCs"] = betas.T RV["sesCisPCs"] = ses.T
def add_reads_from_bam(blocks, filenames, types, filter=None, var_aware=False, primary_only=False, no_mm=False, unstranded=True, mm_tag='NM', cram_ref=None): # blocks coordinates are assumed to be in closed intervals #if filter is None: # filter = dict() # filter['intron'] = 20000 # filter['exon_len'] = 8 # filter['mismatch']= 1 if not types: print('add_reads_from_bam: nothing to do') return verbose = False pair = False pair = ('pair_coverage' in types) clipped = False if type(blocks).__module__ != 'numpy': blocks = sp.array([blocks]) for b in range(blocks.shape[0]): introns_p = None introns_m = None if verbose and b % 10 == 0: print('\radd_exon_track_from_bam: %i(%i)' % (b, blocks.shape[0])) block_len = int(blocks[b].stop - blocks[b].start) ## get data from bam if 'exon_track' in types: (introns_p, introns_m, coverage) = get_all_data(blocks[b], filenames, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if 'mapped_exon_track' in types: (introns_p, introns_m, mapped_coverage) = get_all_data(blocks[b], filenames, spliced=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if 'spliced_exon_track' in types: (introns_p, introns_m, spliced_coverage) = get_all_data(blocks[b], filenames, mapped=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if 'polya_signal_track' in types: (introns_p, introns_m, polya_signals) = get_all_data_uncollapsed(blocks[b], filenames, filter=filter, clipped=True, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if 'end_signal_track' in types: (introns_p, introns_m, read_end_signals) = get_all_data_uncollapsed(blocks[b], filenames, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if 'intron_list' in types or 'intron_track' in types: if introns_p is None: (introns_p, introns_m, spliced_coverage) = get_all_data(blocks[b], filenames, mapped=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) if not introns_p is None: introns_p = sort_rows(introns_p) if not introns_m is None: introns_m = sort_rows(introns_m) # add requested data to block tracks = sp.zeros((0, block_len)) intron_list = [] for ttype in types: ## add exon track to block ############################################################################## if ttype == 'exon_track': tracks = sp.r_[tracks, coverage] ## add mapped exon track to block ############################################################################## elif ttype == 'mapped_exon_track': tracks = sp.r_[tracks, mapped_coverage] ## add spliced exon track to block ############################################################################## elif ttype == 'spliced_exon_track': tracks = sp.r_[tracks, spliced_coverage] ## add intron coverage track to block ############################################################################## elif ttype == 'intron_track': intron_coverage = sp.zeros((1, block_len)) if introns_p.shape[0] > 0: for k in range(introns_p.shape[0]): from_pos = max(0, introns_p[k, 0]) to_pos = min(block_len, introns_p[k, 1]) intron_coverage[from_pos:to_pos] += introns_p[k, 2] if introns_m.shape[0] > 0: for k in range(introns_m.shape[0]): from_pos = max(0, introns_m[k, 0]) to_pos = min(block_len, introns_m[k, 1]) intron_coverage[from_pos:to_pos] += introns_m[k, 2] tracks = sp.r_[tracks, intron_coverage] ## compute intron list ############################################################################## elif ttype == 'intron_list': if introns_p.shape[0] > 0 or introns_m.shape[0] > 0: ### filter introns for location relative to block ### this is legacy behavior for matlab versions! ### TODO - Think about keeping this? Make it a parameter? k_idx = sp.where((introns_p[:, 0] > blocks[0].start) & (introns_p[:, 1] < blocks[0].stop))[0] introns_p = introns_p[k_idx, :] k_idx = sp.where((introns_m[:, 0] > blocks[0].start) & (introns_m[:, 1] < blocks[0].stop))[0] introns_m = introns_m[k_idx, :] if unstranded: introns = sort_rows(sp.r_[introns_p, introns_m]) else: if blocks[0].strand == '-': introns = introns_m else: introns = introns_p if filter is not None and 'mincount' in filter: take_idx = sp.where(introns[:, 2] >= filter['mincount'])[0] if take_idx.shape[0] > 0: intron_list.append(introns[take_idx, :]) else: intron_list.append(sp.zeros((0, 3), dtype='uint32')) else: intron_list.append(introns) else: intron_list.append(sp.zeros((0, 3), dtype='uint32')) ## add polya signal track ############################################################################## elif ttype == 'polya_signal_track': ### get only end positions of reads shp = polya_signals end_idx = shp[0] - 1 - polya_signals[:, ::-1].argmax(axis = 1) polya_signals = scipy.sparse.coo_matrix((sp.ones((shp[1],)), (sp.arange(shp[1]), end_idx)), shape = shp) tracks = sp.r_[tracks, polya_signals.sum(axis = 0)] ## add end signal track ############################################################################## elif ttype == 'end_signal_track': ### get only end positions of reads shp = end_signals end_idx = shp[0] - 1 - end_signals[:, ::-1].argmax(axis = 1) end_signals = scipy.sparse.coo_matrix((sp.ones((shp[1],)), (sp.arange(shp[1]), end_idx)), shape = shp) tracks = sp.r_[tracks, end_signals.sum(axis = 0)] else: print('ERROR: unknown type of data requested: %s' % ttype, file=sys.stderr) if len(types) == 1 and types[0] == 'intron_list': return intron_list elif 'intron_list' in types: return (tracks, intron_list) else: return tracks
def ex9(exclude=sc.array([1, 2, 3, 4]), plotfilename='ex9.png', zoom=False, bovyprintargs={}): """ex9: solve exercise 9 Input: exclude - ID numbers to exclude from the analysis zoom - zoom in Output: plot History: 2009-05-27 - Written - Bovy (NYU) """ #Read the data data = read_data('data_yerr.dat') ndata = len(data) nsample = ndata - len(exclude) nSs = 1001 if zoom: Srange = [900, 1000] else: Srange = [0.001, 1500] Ss = sc.linspace(Srange[0], Srange[1], nSs) chi2s = sc.zeros(nSs) for kk in range(nSs): #Put the dat in the appropriate arrays and matrices Y = sc.zeros(nsample) A = sc.ones((nsample, 2)) C = sc.zeros((nsample, nsample)) yerr = sc.zeros(nsample) jj = 0 for ii in range(ndata): if sc.any(exclude == data[ii][0]): pass else: Y[jj] = data[ii][1][1] A[jj, 1] = data[ii][1][0] C[jj, jj] = Ss[kk] yerr[jj] = data[ii][2] #OMG, such bad code jj = jj + 1 #Now compute the best fit and the uncertainties bestfit = sc.dot(linalg.inv(C), Y.T) bestfit = sc.dot(A.T, bestfit) bestfitvar = sc.dot(linalg.inv(C), A) bestfitvar = sc.dot(A.T, bestfitvar) bestfitvar = linalg.inv(bestfitvar) bestfit = sc.dot(bestfitvar, bestfit) chi2s[kk] = chi2(bestfit, A, Y, C) #Now plot the solution plot.bovy_print(**bovyprintargs) #Plot the best fit line xrange = Srange if zoom: yrange = [nsample - 4, nsample] else: yrange = [nsample - 10, nsample + 8] plot.bovy_plot(Ss, chi2s, 'k-', xrange=xrange, yrange=yrange, xlabel=r'$S$', ylabel=r'$\chi^2$', zorder=1) plot.bovy_plot(sc.array(Srange), sc.array([nsample - 2, nsample - 2]), 'k--', zorder=2, overplot=True) #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]), # sc.array(yrange),color='0.75',overplot=True) plot.bovy_plot(sc.array([sc.mean(yerr**2.), sc.mean(yerr**2.)]), sc.array(yrange), color='0.75', overplot=True) plot.bovy_end_print(plotfilename) return 0
import scipy as sp import matplotlib.pylab as pl SIZE = 200 MAXTIME = 500 TFSF_POS = 50 # Index of electric field (included) from which total field starts INTERFACE = 100 # E-field index (inclusive) from where the new medium starts EPSILON_R = 9 MU_R = 1 ez = sp.zeros(SIZE) hy = sp.zeros(SIZE) imp0 = 377.0 snapshots = [] epsR = sp.ones(SIZE) epsR[INTERFACE:] *= EPSILON_R muR = sp.ones(SIZE) muR[INTERFACE:] *= MU_R for t in range(MAXTIME): # TODO: Find out why exactly there is a subtle difference in the incremental # electric and magnetic fields. Solution-wise, there is no noticable # difference. ezinc = sp.exp(-(t+0.5-(-0.5)-30) * (t+0.5-(-0.5)-30) / 100.0) hyinc = sp.exp(-(t-30) * (t-30) / 100.0) / imp0 # TODO: Find out why the ABCs must be given *before* the corresponding # update equation. I'd have thought that it should be done *after*. hy[-1] = hy[-2]
def get_reads(fname, chr_name, start, stop, strand=None, filter=None, mapped=True, spliced=True, var_aware=None, collapse=False, primary_only=False, no_mm=False, mm_tag='NM', cram_ref=None): if not re.search(r'.[bB][aA][mM]$', fname) is None: infile = pysam.AlignmentFile(fname, 'rb') elif not re.search(r'.[cC][rR][aA][mM]$', fname) is None: infile = pysam.AlignmentFile(fname, 'rc', reference_filename=cram_ref, ignore_truncation=True) else: sys.stderr.write('Error: Unknown input alignment format for: %s\n' % fname) ### vectors to build sparse matrix i = [] j = [] read_cnt = 0 introns_p = dict() introns_m = dict() if collapse: read_matrix = sp.zeros((1, stop - start), dtype='int') else: read_matrix = scipy.sparse.coo_matrix((sp.ones(0), ([], [])), shape = (0, stop - start), dtype='bool') length = stop - start #print >> sys.stderr, 'querying %s:%i-%i' % (chr_name, start, stop) ### TODO THIS IS A HACK if chr_name == 'MT': return (read_matrix, sp.zeros(shape=(0, 3), dtype='uint32'), sp.zeros(shape=(0, 3), dtype='uint32')) if infile.gettid(chr_name) > -1: ### pysam query is zero based in position (results are as well), all intervals are pythonic half open for read in infile.fetch(chr_name, start, stop, until_eof=True): ### check if we skip this read if filter_read(read, filter, spliced, mapped, strand, primary_only, var_aware, no_mm, mm_tag=mm_tag): continue tags = dict(read.tags) curr_read_stranded = ('XS' in tags) is_minus = False if curr_read_stranded: is_minus = (tags['XS'] == '-') ### get introns and covergae p = read.pos for o in read.cigar: if o[0] == 3: if is_minus: try: introns_m[(p, p + o[1])] += 1 except KeyError: introns_m[(p, p + o[1])] = 1 else: try: introns_p[(p, p + o[1])] += 1 except KeyError: introns_p[(p, p + o[1])] = 1 if o[0] in [0, 2]: _start = int(max(p-start, 0)) _stop = int(min(p + o[1] - start, stop - start)) if _stop < 0 or _start > length: if o[0] in [0, 2, 3]: p += o[1] continue if collapse: read_matrix[0, _start:_stop] += 1 else: r = sp.arange(_start, _stop) i.extend([read_cnt] * len(r)) j.extend(r) #for pp in range(p, p + o[1]): # if pp - start >= 0 and pp < stop: # i.append(read_cnt) # j.append(pp - start) if o[0] in [0, 2, 3]: p += o[1] ### the follwoing is new behavior and gonne come in the next version --> deletions are not counted towards coverage #### get coverage #for p in read.positions: # if p - start >= 0: # if p >= stop: # break # else: # i.append(read_cnt) # j.append(p - start) read_cnt += 1 ### construct sparse matrix if not collapse: try: i = sp.array(i, dtype='int') j = sp.array(j, dtype='int') read_matrix = scipy.sparse.coo_matrix((sp.ones(i.shape[0]), (i, j)), shape = (read_cnt, stop - start), dtype='bool') except ValueError: step = 1000000 _k = step assert len(i) > _k read_matrix = scipy.sparse.coo_matrix((sp.ones(_k), (i[:_k], j[:_k])), shape = (read_cnt, stop - start), dtype='bool') while _k < len(i): _l = min(len(i), _k + step) read_matrix += scipy.sparse.coo_matrix((sp.ones(_l - _k), (i[_k:_l], j[_k:_l])), shape = (read_cnt, stop - start), dtype='bool') _k = _l ### convert introns into scipy array if len(introns_p) >= 1: introns_p = sp.array([[k[0], k[1], v] for k, v in introns_p.items()], dtype='uint32') introns_p = sort_rows(introns_p) else: introns_p = sp.zeros(shape=(0, 3), dtype='uint32') if len(introns_m) >= 1: introns_m = sp.array([[k[0], k[1], v] for k, v in introns_m.items()], dtype='uint32') introns_m = sort_rows(introns_m) else: introns_m = sp.zeros(shape=(0, 3), dtype='uint32') return (read_matrix, introns_p, introns_m)
def simulate_data(N=200, seed=1234567, views=["0", "1", "2", "3"], D=[500, 200, 500, 200], noise_level=1, K=4, G=1, lscales=[0.2, 0.8, 0.0, 0.0], sample_cov="equidistant", scales=[1, 0.8, 0, 0], shared=True, plot=False): """ Function to simulate test data for MOFA (without ARD or spike-and-slab on factors) N: Number of time points/ samples per group seed: seed to use for simulation views: list of view names K: number of factors G: Number of groups D: list of number of features per view (same length as views) noise_level: variance of the residuals (1/tau); per feature it is multiplied by a uniform random number in [0.5, 1.5] to model differences in features' noise scales, lscales: hyperparameters of the GP per factor (length as given by K) sample_cov: sample_covariates to use (can be of shape N X C) or "equidistant" or None shared: A list or single boolean indicating for each factor whether it is perfectly shared across groups or not. For non-shared ones pairwise group-group correlations are simulated by a Bernoulli distribution. Only relevant for factors with lengthscale and scale > 0. plot: If True, simulation results are plotted """ # simulate some test data np.random.seed(seed) M = len(views) N = int(N) if type(shared) == bool: shared = [shared] if len(shared) == 1: shared = [shared] * K groupidx = np.repeat(range(G), N) # kronecker structure if not sample_cov is None: if sample_cov == "equidistant": sample_cov = np.linspace(0, 1, N) sample_cov = sample_cov.reshape(N, 1) else: assert sample_cov.shape[ 0] == N, "Number of rows of sample_cov and N does not match" if len(np.repeat(np.arange(0, 100, 1), 2).shape) == 1: sample_cov = sample_cov.reshape(N, 1) distC = SS.distance.pdist(sample_cov, 'euclidean')**2. distC = SS.distance.squareform(distC) else: lscales = [0] * K Gmats = [] for k in range(K): if scales[k] == 0 or lscales[k] == 0: # group structure not modelled Gmat = np.eye(G) else: if shared[k]: Gmat = np.ones([G, G]) else: x = np.random.uniform(-1, 1, G) Gmat = np.outer(x, x) + 0.5 * np.eye(G) Gmat = covar_to_corr(Gmat) Gmats.append(Gmat) # simulate Sigma Sigma = [] for k in range(K): if lscales[k] > 0: Kmat = scales[k] * np.exp(-distC / (2 * lscales[k]**2)) Kmat = np.kron(Gmats[k], Kmat) Sigma.append(Kmat + (1 - scales[k]) * np.eye(N * G)) elif lscales[k] == 0: Kmat = scales[k] * (distC == 0).astype(float) Kmat = np.kron(Gmats[k], Kmat) Sigma.append(Kmat + (1 - scales[k]) * np.eye(N * G)) # Sigma.append(np.eye(N*G)) else: sys.exit("All lengthscales need to be non-negative") # plot covariance structure if plot: fig, axs = plt.subplots(1, K, sharex=True, sharey=True) for k in range(K): sns.heatmap(Sigma[k], ax=axs[k]) # simulate factor values Zks = [] for k in range(K): sig = Sigma[k] Zks.append(np.random.multivariate_normal(np.zeros(N * G), sig, 1)) Zks = np.vstack(Zks).transpose() Z = [] for g in range(G): Z.append(Zks[groupidx == g, ]) # simulate alpha and theta, each factor should be active in at least one view inactive = 1000 active = 1 theta = 0.5 * np.ones([M, K]) alpha_tmp = [s.ones(M) * inactive] * K for k in range(K): while s.all(alpha_tmp[k] == inactive): alpha_tmp[k] = s.random.choice([active, inactive], size=M, replace=True) alpha = [s.array(alpha_tmp)[:, m] for m in range(M)] # simulate weights W = [] for m in range(M): W.append( np.column_stack([ np.random.normal(0, np.sqrt(1 / alpha[m][k]), D[m]) * np.random.binomial(1, theta[m][k], D[m]) for k in range(K) ])) # simulate heteroscedastic noise noise = [] for m in range(M): tau_m = stats.uniform.rvs( loc=0.5, scale=1, size=D[m] ) * 1 / noise_level # uniform between 0.5 and 1.5 scaled by noise level noise.append( np.random.multivariate_normal(np.zeros(D[m]), np.eye(D[m]) * 1 / tau_m, N)) # generate data data = [] for m in range(M): tmp = [] for g in range(G): tmp.append(Z[g].dot(W[m].transpose()) + noise[m]) data.append(tmp) # store as list of groups if not sample_cov is None: sample_cov = [sample_cov] * G return { 'data': data, 'W': W, 'Z': Z, 'noise': noise, 'sample_cov': sample_cov, 'Sigma': Sigma, 'views': views, 'lscales': lscales, 'N': N, 'Gmats': Gmats }
def get_LDpred_ld_tables(snps, ld_radius=100, ld_window_size=0, h2=None, n_training=None, gm=None, gm_ld_radius=None): """ Calculates LD tables, and the LD score in one go... """ ld_dict = {} m, n = snps.shape print m, n ld_scores = sp.ones(m) ret_dict = {} if gm_ld_radius is None: for snp_i, snp in enumerate(snps): # Calculate D start_i = max(0, snp_i - ld_radius) stop_i = min(m, snp_i + ld_radius + 1) X = snps[start_i: stop_i] D_i = sp.dot(snp, X.T) / n r2s = D_i ** 2 ld_dict[snp_i] = D_i lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32') ld_scores[snp_i] = lds_i else: assert gm is not None, 'Genetic map is missing.' window_sizes = [] ld_boundaries = [] for snp_i, snp in enumerate(snps): curr_cm = gm[snp_i] # Now find lower boundary start_i = snp_i min_cm = gm[snp_i] while start_i > 0 and min_cm > curr_cm - gm_ld_radius: start_i = start_i - 1 min_cm = gm[start_i] # Now find the upper boundary stop_i = snp_i max_cm = gm[snp_i] while stop_i > 0 and max_cm < curr_cm + gm_ld_radius: stop_i = stop_i + 1 max_cm = gm[stop_i] ld_boundaries.append([start_i, stop_i]) curr_ws = stop_i - start_i window_sizes.append(curr_ws) assert curr_ws > 0, 'Some issues with the genetic map' X = snps[start_i: stop_i] D_i = sp.dot(snp, X.T) / n r2s = D_i ** 2 ld_dict[snp_i] = D_i lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32') ld_scores[snp_i] = lds_i avg_window_size = sp.mean(window_sizes) print 'Average # of SNPs in LD window was %0.2f' % avg_window_size if ld_window_size == 0: ld_window_size = avg_window_size * 2 ret_dict['ld_boundaries'] = ld_boundaries ret_dict['ld_dict'] = ld_dict ret_dict['ld_scores'] = ld_scores if ld_window_size > 0: ref_ld_matrices = [] inf_shrink_matrices = [] for wi in range(0, m, ld_window_size): start_i = wi stop_i = min(m, wi + ld_window_size) curr_window_size = stop_i - start_i X = snps[start_i: stop_i] D = sp.dot(X, X.T) / n ref_ld_matrices.append(D) if h2 != None and n_training != None: A = ((m / h2) * sp.eye(curr_window_size) + (n_training / (1)) * D) A_inv = linalg.pinv(A) inf_shrink_matrices.append(A_inv) ret_dict['ref_ld_matrices'] = ref_ld_matrices if h2 != None and n_training != None: ret_dict['inf_shrink_matrices'] = inf_shrink_matrices return ret_dict
def flood(im, regions=None, mode='max'): r""" Floods/fills each region in an image with a single value based on the specific values in that region. The ``mode`` argument is used to determine how the value is calculated. Parameters ---------- im : array_like An ND image with isolated regions containing 0's elsewhere. regions : array_like An array the same shape as ``im`` with each region labeled. If None is supplied (default) then ``scipy.ndimage.label`` is used with its default arguments. mode : string Specifies how to determine which value should be used to flood each region. Options are: *'max'* : Floods each region with the local maximum in that region *'min'* : Floods each region the local minimum in that region *'size'* : Floods each region with the size of that region Returns ------- An ND-array the same size as ``im`` with new values placed in each forground voxel based on the ``mode``. See Also -------- props_to_image """ mask = im > 0 if regions is None: labels, N = spim.label(mask) else: labels = sp.copy(regions) N = labels.max() I = im.flatten() L = labels.flatten() if mode.startswith('max'): V = sp.zeros(shape=N + 1, dtype=float) for i in range(len(L)): if V[L[i]] < I[i]: V[L[i]] = I[i] elif mode.startswith('min'): V = sp.ones(shape=N + 1, dtype=float) * sp.inf for i in range(len(L)): if V[L[i]] > I[i]: V[L[i]] = I[i] elif mode.startswith('size'): V = sp.zeros(shape=N + 1, dtype=int) for i in range(len(L)): V[L[i]] += 1 im_flooded = sp.reshape(V[labels], newshape=im.shape) im_flooded = im_flooded * mask return im_flooded
def hierarchical_kmeans_w_mlc( feat_mat, mlc_mats: list, use_freq, max_leaf_size=100, imbalanced_ratio=0.0, imbalanced_depth=100, spherical=True, seed=0, max_iter=20, threads=-1, ): """ Parameters ---------- feat_mat mlc_mats: list list of must link constraint matrix use_freq max_leaf_size imbalanced_ratio imbalanced_depth spherical seed max_iter threads Returns ------- """ global run_kmeans def run_kmeans(cluster, c1, c2, min_size, max_iter, spherical=True): if point_freq_global is None: indexer = kmeans(feat_mat_global[cluster], None, c1, c2, min_size, max_iter, spherical) else: indexer = kmeans( feat_mat_global[cluster], point_freq_global[cluster], c1, c2, min_size, max_iter, spherical, ) return cluster[indexer], cluster[~indexer] global kmeans def kmeans(feat_mat, freqs, c1=-1, c2=-1, min_size=50, max_iter=20, spherical=True): if c1 == -1: c1, c2 = sp.random.randint(feat_mat.shape[0]), sp.random.randint( 1, feat_mat.shape[0]) c1, c2 = feat_mat[c1], feat_mat[(c1 + c2) % feat_mat.shape[0]] old_indexer = sp.ones(feat_mat.shape[0]) * -1 for _ in range(max_iter): scores = sp.squeeze(sp.asarray(feat_mat.multiply(c1 - c2).sum(1))) if freqs is None: indexer = get_split_wo_freq(scores=scores, min_size=min_size) else: indexer = get_split_w_freq(scores=scores, min_size=min_size, freqs=freqs) if sp.array_equal(indexer, old_indexer): break old_indexer = indexer c1 = feat_mat[indexer].sum(0) c2 = feat_mat[~indexer].sum(0) if spherical: c1 = normalize(c1) c2 = normalize(c2) return indexer global feat_mat_global, point_freq_global feat_mat_global = feat_mat point_freq_global = None random = sp.random.RandomState(seed) cluster_chain = [] clusters_big, clusters_small = [], [] if feat_mat.shape[0] > max_leaf_size: clusters_big.append(sp.arange(feat_mat.shape[0])) else: clusters_small.append(sp.arange(feat_mat.shape[0])) while ( len(clusters_big) > 0 ): # Iterate until there is at least one cluster with > max_leaf_size nodes curr_level = len(cluster_chain) # Do balanced clustering beyond imbalanced_depth to ensure reasonably timely termination if curr_level >= imbalanced_depth: imbalanced_ratio = 0 # Enact Must-link constraints by creating connected components based on must-link constraints if curr_level >= len(mlc_mats): """If there are no must-link constraints for this level onward, then append an identity matrix which says that the trivial thing that every point must link to itself!""" n = feat_mat.shape[0] mlc_mats.append( smat.csr_matrix(smat.diags(np.ones((n)), shape=(n, n)))) clusters_big_cc = [] feat_mat_cc = [] cum_idx_cc = 0 old_cc_to_new_cc = np.zeros((mlc_mats[curr_level].shape[1])) - 1 new_cc_to_old_cc = np.zeros((mlc_mats[curr_level].shape[1])) - 1 num_points_per_cc = [] for cluster in clusters_big: # Get constraints mat and features mat rows for this cluster local_feat_mat = feat_mat[cluster] local_mlc_mat = mlc_mats[curr_level][cluster] # Find # non zero cols in local_mlc_mat. That'll be # conn components(= num_CC) over points in cluster num_points = len(cluster) non_zero_cols = np.diff(local_mlc_mat.tocsc().indptr).nonzero()[0] num_CC = non_zero_cols.shape[0] # Retain only non-zero cols in local_mlc_mat. Now it should be of shape num_points x num_CC local_mlc_mat = local_mlc_mat[:, non_zero_cols] local_num_points_per_cc = np.array( np.sum(local_mlc_mat.ceil(), axis=0, dtype=int)).reshape(-1) # Get feature vec for each conn component using points in that conn comp. # (# conn comp x # points) x (# points x # features) --> ( # conn comp x # features ) local_feat_mat_w_mlc = local_mlc_mat.transpose() * local_feat_mat feat_mat_cc.append(local_feat_mat_w_mlc) num_points_per_cc.append(local_num_points_per_cc) assert local_mlc_mat.shape == (num_points, num_CC) assert local_feat_mat.shape == (num_points, feat_mat.shape[1]) assert local_feat_mat_w_mlc.shape == (num_CC, feat_mat.shape[1]) """ Assert that each cols sums to one, and sum of total matrix is equal to num_CC. This is important for correctness when getting conn comp vector using point vectors. """ assert (np.round(np.sum(local_mlc_mat, axis=0)) == np.ones( (1, num_CC))).all() assert int(np.round(np.sum(local_mlc_mat))) == num_CC """ Give indices to each conn comp, offsetting it using cum_idx_cc which keeps track of # conn comp so far, and add this list to cluster_big_cc """ cc_idxs = np.arange(num_CC) + cum_idx_cc clusters_big_cc.append(cc_idxs) old_cc_to_new_cc[non_zero_cols] = cc_idxs new_cc_to_old_cc[cc_idxs] = non_zero_cols cum_idx_cc += num_CC feat_mat_global_cc = smat.csr_matrix(smat.vstack(feat_mat_cc)) if use_freq: point_freq_global = np.concatenate(num_points_per_cc).reshape(-1) assert point_freq_global.shape == (feat_mat_global_cc.shape[0], ) clusters_big = clusters_big_cc feat_mat_global = feat_mat_global_cc LOGGER.info("Shape of new global feat matrix = {}".format( feat_mat_global.shape)) num_parent_clusters = len(clusters_big) + len(clusters_small) new_clusters_big = [] new_clusters_small = [] cols_big, cols_small = [], [ x + len(clusters_big) for x in range(len(clusters_small)) ] seeds = [(random.randint(s), random.randint(1, s)) for s in map(len, clusters_big)] min_sizes = [ int(s * (0.5 - imbalanced_ratio)) for s in map(len, clusters_big) ] with mp.Pool(threads if threads > 0 else mp.cpu_count()) as p: for col, child_clusters in enumerate( p.starmap( run_kmeans, zip( clusters_big, *map(list, zip(*seeds)), min_sizes, repeat(max_iter), repeat(spherical), ), )): for cluster_cc in child_clusters: """cluster is a list of connected component indices. Convert this list to list of indices of points in these connected components""" # Map new conn_comp indices to old conn_comp indices cluster_cc = new_cc_to_old_cc[cluster_cc] # Get mlc matrix with only cols restricted to current list of conn components local_mlc_mat = mlc_mats[curr_level][:, cluster_cc] assert local_mlc_mat.shape == (feat_mat.shape[0], len(cluster_cc)) # Get points in these conn components, which have non zero value in their corresponding row cluster = np.diff(local_mlc_mat.indptr).nonzero()[0] if len(cluster) > max_leaf_size and len(cluster_cc) > 1: new_clusters_big.append(cluster) cols_big.append(col) elif len(cluster) > max_leaf_size and len(cluster_cc) == 1: """Add to small clusters, even though this cluster has more than max_leaf_size points because this cluster has just one connected component and thus can not split further due to must-link constraints """ new_clusters_small.append(cluster) cols_small.append(col) elif len(cluster) > max_leaf_size and len(cluster_cc) == 0: # This condition is not possible but still having this for a sanity check raise NotImplementedError elif len(cluster) > 0: new_clusters_small.append(cluster) cols_small.append(col) # else: # Do not raise error when a cluster is empty. # raise NotImplementedError cols = cols_big + cols_small clusters_small.extend(new_clusters_small) curr_clust_mat = smat.csc_matrix( (sp.ones(len(cols)), (range(len(cols)), cols)), shape=(len(new_clusters_big + clusters_small), num_parent_clusters), dtype=sp.float32, ) cluster_chain.append(curr_clust_mat) clusters_big = new_clusters_big LOGGER.info("Cluster chain shape at level = {} is {}".format( curr_level, curr_clust_mat.shape)) C = [] for col, cluster in enumerate(chain(clusters_big, clusters_small)): for row in cluster: C.append((row, col)) cluster_mat_cc = smat.csc_matrix( (sp.ones(feat_mat.shape[0]), list(map(list, zip(*C)))), shape=(feat_mat.shape[0], len(clusters_big) + len(clusters_small)), dtype=sp.float32, ) cluster_mat = smat.csc_matrix(mlc_mats[-1] * cluster_mat_cc, dtype=sp.float32) cluster_chain.append(cluster_mat) LOGGER.info("Cluster chain shape at final level is {}".format( cluster_mat.shape)) return cluster_chain
def parallel_compute_ll_matrix(gp, bounds, num_pts, num_proc=None): """Compute matrix of the log likelihood over the parameter space in parallel. Parameters ---------- bounds : 2-tuple or list of 2-tuples with length equal to the number of free parameters Bounds on the range to use for each of the parameters. If a single 2-tuple is given, it will be used for each of the parameters. num_pts : int or list of ints with length equal to the number of free parameters The number of points to use for each parameters. If a single int is given, it will be used for each of the parameters. num_proc : Positive int or None, optional Number of processes to run the parallel computation with. If set to None, ALL available cores are used. Default is None (use all available cores). Returns ------- ll_vals : array The log likelihood for each of the parameter possibilities. param_vals : list of array The parameter values used. """ if num_proc is None: num_proc = multiprocessing.cpu_count() present_free_params = gp.free_params bounds = scipy.atleast_2d(scipy.asarray(bounds, dtype=float)) if bounds.shape[1] != 2: raise ValueError("Argument bounds must have shape (n, 2)!") # If bounds is a single tuple, repeat it for each free parameter: if bounds.shape[0] == 1: bounds = scipy.tile(bounds, (len(present_free_params), 1)) # If num_pts is a single value, use it for all of the parameters: try: iter(num_pts) except TypeError: num_pts = num_pts * scipy.ones(bounds.shape[0], dtype=int) else: num_pts = scipy.asarray(num_pts, dtype=int) if len(num_pts) != len(present_free_params): raise ValueError( "Length of num_pts must match the number of free parameters of kernel!" ) # Form arrays to evaluate parameters over: param_vals = [] for k in xrange(0, len(present_free_params)): param_vals.append( scipy.linspace(bounds[k, 0], bounds[k, 1], num_pts[k])) pv_cases = list() gp_cases = list() num_pts_cases = list() for k in xrange(0, len(param_vals[0])): specific_param_vals = list(param_vals) specific_param_vals[0] = param_vals[0][k] pv_cases.append(specific_param_vals) gp_cases += [copy.deepcopy(gp)] num_pts_cases.append(num_pts) pool = multiprocessing.Pool(processes=num_proc) try: vals = scipy.asarray( pool.map(_compute_ll_matrix_wrapper, zip(gp_cases, pv_cases, num_pts_cases))) finally: pool.close() return (vals, param_vals)
data_subsample = data.subsample_phenotypes(phenotype_query=phenotype_query,intersection=True) #get variables we need from data snps = data_subsample.getGenotypes(impute_missing=True) phenotypes,sample_idx = data_subsample.getPhenotypes(phenotype_query=phenotype_query,intersection=True); assert sample_idx.all() sample_relatedness = data_subsample.getCovariance() pos = data_subsample.getPos() #set parameters for the analysis N, P = phenotypes.shape covs = None #covariates Acovs = None #the design matrix for the covariates Asnps = SP.ones((1,P)) #the design matrix for the SNPs K1r = sample_relatedness #the first sample-sample covariance matrix (non-noise) K2r = SP.eye(N) #the second sample-sample covariance matrix (noise) K1c = None #the first phenotype-phenotype covariance matrix (non-noise) K2c = None #the second phenotype-phenotype covariance matrix (noise) covar_type = 'freeform' #the type of covariance matrix to be estimated for unspecified covariances searchDelta = False #specify if delta should be optimized for each SNP test="lrt" #specify type of statistical test # Running the analysis # when cov are not set (None), LIMIX considers an intercept (covs=SP.ones((N,1))) lmm, pvalues = QTL.test_lmm_kronecker(snps,phenotypes.values,covs=covs,Acovs=Acovs,Asnps=Asnps,K1r=K1r,trait_covar_type=covar_type) #convert P-values to a DataFrame for nice output writing: pvalues = pd.DataFrame(data=pvalues.T,index=data_subsample.geno_ID,columns=['YJR139C']) pvalues = pd.concat([pos,pvalues],join="outer",axis=1)