def get_4_squares(parent1, parent2):
    n_folds = 2
    levels1 = np.unique(parent1)
    levels2 = np.unique(parent2)
    N1 = len(levels1)
    N2 = len(levels2)
    r1 = sp.random.permutation(N1)
    r2 = sp.random.permutation(N2)
    Icv1 = sp.floor(((sp.ones((N1))*n_folds)*r1)/N1)
    Icv2 = sp.floor(((sp.ones((N2))*n_folds)*r2)/N2)

    train_parents1 = levels1[Icv1 != 0]
    train_parents2 = levels2[Icv2 != 0]
    test_parents1 = levels1[Icv1 == 0]
    test_parents2 = levels2[Icv2 == 0]

    train_ind1 = np.array([e in train_parents1 for e in parent1], dtype=bool)
    train_ind2 = np.array([e in train_parents2 for e in parent2], dtype=bool)
    test_ind1 = np.array([e in test_parents1 for e in parent1], dtype=bool)
    test_ind2 = np.array([e in test_parents2 for e in parent2], dtype=bool)

    Itest = test_ind1 & test_ind2
    
    Itrain_distant = train_ind1 & train_ind2
    Itrain_close1 = (train_ind1 & test_ind2)
    Itrain_close2 = (train_ind2 & test_ind1)
    Itrain_close = select_subset(Itrain_close1 | Itrain_close2, Itest.sum())

    return Itest, Itrain_distant, Itrain_close1, Itrain_close2, Itrain_close
Exemplo n.º 2
0
 def __init__(self, renderer=True, realtime=True, ip="127.0.0.1", port="21560"):
     # initialize base class
     GraphicalEnvironment.__init__(self)
     self.actLen=12
     self.mySensors=sensors.Sensors(["EdgesReal"])
     self.dists=array([20.0, sqrt(2.0)*20, sqrt(3.0)*20])
     self.gravVect=array([0.0,-100.0,0.0])
     self.centerOfGrav=zeros((1,3),float)
     self.pos=ones((8,3),float)
     self.vel=zeros((8,3),float)
     self.SpringM = ones((8,8),float)
     self.d=60.0
     self.dt=0.02
     self.startHight=10.0
     self.dumping=0.4
     self.fraktMin=0.7
     self.fraktMax=1.3
     self.minAkt=self.dists[0]*self.fraktMin
     self.maxAkt=self.dists[0]*self.fraktMax
     self.reset()
     self.count=0
     self.setEdges()
     self.act(array([20.0]*12))
     self.euler()
     self.realtime=realtime
     self.step=0
     if renderer:
         self.setRenderInterface(FlexCubeRenderInterface(ip, port))
         self.getRenderInterface().updateData(self.pos, self.centerOfGrav)
Exemplo n.º 3
0
def buildBitsLUT():
	global lutAsString
	nEntries=256
	contrast=0.5
	gamma=1.0
	ramp = scipy.arange(-1.0,1.0,2.0/nEntries)
	ramp = (ramp*contrast+1.0)/2.0 #get into range 0:1
	ramp = (ramp**gamma) * 2**16
	ramp = ramp.astype(scipy.UnsignedInt16)
	RGB = scipy.ones((1,nEntries*2,3),scipy.UnsignedInt8)
	RGB[:, 0::2, 0] = 1#byteMS(ramp)#R
	RGB[:, 1::2, 0] = 0# byteLS(ramp)
	RGB[:, 0::2, 1] = 1#byteMS(ramp)#G
	RGB[:, 1::2, 1] = 0#byteLS(ramp)
	RGB[:, 0::2, 2] = 1#byteMS(ramp)#B
	RGB[:, 1::2, 2] = 0#byteLS(ramp)

	#prepend the bits++ header (precedes LUT)
	#and create a string version ready for drawing
	head = scipy.ones((1,12,3),scipy.UnsignedInt8)
	head[:,:,0] = [ 36, 63, 8, 211, 3, 112, 56, 34,0,0,0,0]#R
	head[:,:,1] = [ 106, 136, 19, 25, 115, 68, 41, 159,0,0,0,0]#G
	head[:,:,2] = [ 133, 163, 138, 46, 164, 9, 49, 208,0,0,0,0]#B
	head[:,:,0] = [ 0, 63, 8, 211, 3, 112, 56, 34,0,0,0,0]#R
	head[:,:,1] = [ 0, 136, 19, 25, 115, 68, 41, 159,0,0,0,0]#G
	head[:,:,2] = [ 0, 163, 138, 46, 164, 9, 49, 208,0,0,0,0]#B
	#head[:,:,0] = [ 255, 255, 0, 0, 0, 0, 56, 34,0,0,0,0]#R
	#head[:,:,1] = [ 0, 0, 255, 255, 0, 0, 41, 159,0,0,0,0]#G
	#head[:,:,2] = [ 0, 0, 0, 0, 255, 255, 49, 208,0,0,0,0]#B
	asArr = scipy.concatenate((head,RGB),1)
	lutAsString = asArr.tostring()
Exemplo n.º 4
0
 def test_pore2centroid(self):
     temp_coords = self.net['pore.coords']
     self.geo['pore.centroid'] = sp.ones([self.geo.num_pores(), 3])
     vo.pore2centroid(self.net)
     assert sp.sum(self.net['pore.coords'] -
                   sp.ones([self.geo.num_pores(), 3])) == 0.0
     self.net['pore.coords'] = temp_coords
Exemplo n.º 5
0
    def calculateGradient(self):
        # normalize rewards
        # self.dataset.data['reward'] /= max(ravel(abs(self.dataset.data['reward'])))

        # initialize variables
        R = ones((self.dataset.getNumSequences(), 1), float)
        X = ones((self.dataset.getNumSequences(), self.loglh.getDimension('loglh') + 1), float)

        # collect sufficient statistics
        print self.dataset.getNumSequences()
        for n in range(self.dataset.getNumSequences()):
            _state, _action, reward = self.dataset.getSequence(n)
            seqidx = ravel(self.dataset['sequence_index'])
            if n == self.dataset.getNumSequences() - 1:
                # last sequence until end of dataset
                loglh = self.loglh['loglh'][seqidx[n]:, :]
            else:
                loglh = self.loglh['loglh'][seqidx[n]:seqidx[n + 1], :]

            X[n, :-1] = sum(loglh, 0)
            R[n, 0] = sum(reward, 0)

        # linear regression
        beta = dot(pinv(X), R)
        return beta[:-1]
def make_block_border_mask(spots, areas):
    """Returns a mask indicating which pixels lie just outside a block of spots"""
    inside = make_inside_mask(spots)
    outside = make_outside_mask(spots, areas)
    very_near_inside = sp.ndimage.binary_dilation(inside, structure=sp.ones((3,3)), iterations=8)
    near_inside = sp.ndimage.binary_dilation(inside, structure=sp.ones((3,3)), iterations=32)
    return near_inside & ~very_near_inside & outside
Exemplo n.º 7
0
def kalman_filter(b,
                  V,
                  Phi,
                  y,
                  X,
                 sigma,
                  Sigma,
                  switch = 0,
                  D = None,
                  d = None,
                  G = None,
                  a = None,
                  c = None):
    r"""
    
    .. math::
       :nowrap:

       \begin{eqnarray*}
       \beta_{t|t-1} = \Phi \: \beta_{t-1|t-1}\\
       V_{t|t-1} = \Phi  V_{t-1|t-1} \Phi ^T + \Sigma \\
       e_t = y_t -  X_t \beta_{t|t-1}\\
       K_t =  V_{t|t-1} X_t^T (\sigma + X_t V_{t|t-1} X_t )^{-1}\\
       \beta_{t|t} = \beta_{t|t-1} + K_t e_t\\
       V_{t|t} = (I - K_t X_t^T) V_{t|t-1}\\
       \end{eqnarray*}

    """

    n = scipy.shape(X)[1]
    beta = scipy.empty(scipy.shape(X))
    n = len(b)
    if D is None:
        D = scipy.ones((1, n))
    if d is None:
        d = scipy.matrix(1.)
    if G is None:
        G = scipy.identity(n)
    if a is None:
        a = scipy.zeros((n, 1))
    if c is None:
        c = scipy.ones((n, 1))
#        import code; code.interact(local=locals())
    (b, V) = kalman_predict(b, V, Phi, Sigma)
    for i in xrange(len(X)):
        beta[i] = scipy.array(b).T
        (b, V, e, K) = kalman_upd(b,
                                V,
                                y[i],
                                X[i],
                                sigma,
                                Sigma,
                                switch,
                                D,
                                d,
                                G,
                                a,
                                c)
        (b, V) = kalman_predict(b, V, Phi, Sigma)
    return beta
Exemplo n.º 8
0
    def learn(self, X, t, tol=0.01, amax=1e10):
        u"""学習"""

        N = X.shape[0]
        a = sp.ones(N+1) # hyperparameter
        b = 1.0
        phi = sp.ones((N, N+1)) # design matrix
        phi[:,1:] = [[self._kernel(xi, xj) for xj in X] for xi in X]

        diff = 1
        while diff >= tol:
            sigma = spla.inv(sp.diag(a) + b * sp.dot(phi.T, phi))
            m = b * sp.dot(sigma, sp.dot(phi.T, t))
            gamma = sp.ones(N+1) - a * sigma.diagonal()
            anew = gamma / (m * m)
            bnew = (N -  gamma.sum()) / sp.square(spla.norm(t - sp.dot(phi, m)))
            anew[anew >= amax] = amax
            adiff, bdiff = anew - a, bnew - b
            diff = (adiff * adiff).sum() + bdiff * bdiff
            a, b = anew, bnew
            print ".",

        self._a = a
        self._b = b
        self._X = X
        self._m = m
        self._sigma = sigma
        self._amax = amax
Exemplo n.º 9
0
def phenSpecificEffects(snps,pheno1,pheno2,K=None,covs=None,test='lrt'):
    """
    Univariate fixed effects interaction test for phenotype specific SNP effects

    Args:
        snps:   [N x S] SP.array of S SNPs for N individuals (test SNPs)
        pheno1: [N x 1] SP.array of 1 phenotype for N individuals
        pheno2: [N x 1] SP.array of 1 phenotype for N individuals
        K:      [N x N] SP.array of LMM-covariance/kinship koefficients (optional)
                        If not provided, then linear regression analysis is performed
        covs:   [N x D] SP.array of D covariates for N individuals
        test:    'lrt' for likelihood ratio test (default) or 'f' for F-test

    Returns:
        limix LMM object
    """
    N=snps.shape[0]
    if K is None:
        K=SP.eye(N)
    assert (pheno1.shape[1]==pheno2.shape[1]), "Only consider equal number of phenotype dimensions"
    if covs is None:
        covs = SP.ones(N,1)
    assert (pheno1.shape[1]==1 and pheno2.shape[1]==1 and pheno1.shape[0]==N and pheno2.shape[0]==N and K.shape[0]==N and K.shape[1]==N and covs.shape[0]==N), "shapes missmatch"
    Inter = SP.zeros((N*2,1))
    Inter[0:N,0]=1
    Inter0 = SP.ones((N*2,1))
    Yinter=SP.concatenate((pheno1,pheno2),0)
    Xinter = SP.tile(snps,(2,1))
    Covitner= SP.tile(covs(2,1))
    lm = simple_interaction(snps=Xinter,pheno=Yinter,covs=Covinter,Inter=Inter,Inter0=Inter0,test=test)
    return lm
Exemplo n.º 10
0
def sqcover(A,n):
    edge = sp.sqrt(A) # the length of an edge
    d = edge/n # the distance between two adjacent points
    r = d/2 # the "radius of "
    end = edge - r # end point
    base = sp.linspace(r, end, n)
    first_line = sp.transpose(sp.vstack((base, r*sp.ones(n))))
    increment = sp.transpose(sp.vstack((sp.zeros(n), d*sp.ones(n))))
    pts = first_line
    y_diff = increment
    for i in range(n-1):
        pts = sp.vstack((pts, first_line + y_diff))
        y_diff = y_diff + increment
    
    # Color matter
    colors = []
    for p in pts:
        cval = n*p[0] + p[1] # the x-coord has a higher weight
        cval = colormap.Spectral(cval/((n+1)*end)) # normalize by the max value that cval can take.
        colors.append(cval)

    colors = sp.array(colors)

    cover = (pts, r, colors)
    return cover
Exemplo n.º 11
0
def createLargeSubMatrix():

    # Create a large matrix, but with same amount of 'ones' as the small submatrix

    t1 = time.time()

    m=40000
    n=1000000

    M=sparse.lil_matrix((m,n))

    m=500
    n=20000

    # Populate some of the matrix
    M[0,:]=ones(n)
    M[:,0]=1
    M[(m/2),:]=ones(n)
    M[:,(n/2)]=1
    M[(m-1),:]=ones(n)
    M[:,(n-1)]=1

    t2 = time.time()
    print 'Time used: ',(t2-t1)

    return M
Exemplo n.º 12
0
 def __init__(self, render=True, realtime=True, ip="127.0.0.1", port="21560"):
     # initialize base class
     self.render = render
     if self.render:
         self.updateDone = True
         self.updateLock = threading.Lock()
         self.server = UDPServer(ip, port)
     self.actLen = 12
     self.mySensors = sensors.Sensors(["EdgesReal"])
     self.dists = array([20.0, sqrt(2.0) * 20, sqrt(3.0) * 20])
     self.gravVect = array([0.0, -100.0, 0.0])
     self.centerOfGrav = zeros((1, 3), float)
     self.pos = ones((8, 3), float)
     self.vel = zeros((8, 3), float)
     self.SpringM = ones((8, 8), float)
     self.d = 60.0
     self.dt = 0.02
     self.startHight = 10.0
     self.dumping = 0.4
     self.fraktMin = 0.7
     self.fraktMax = 1.3
     self.minAkt = self.dists[0] * self.fraktMin
     self.maxAkt = self.dists[0] * self.fraktMax
     self.reset()
     self.count = 0
     self.setEdges()
     self.act(array([20.0] * 12))
     self.euler()
     self.realtime = realtime
     self.step = 0
Exemplo n.º 13
0
    def _additionalInit(self):
        assert self.numberOfCenters == 1, 'Mixtures of Gaussians not supported yet.'

        xdim = self.numParameters
        self.alphas = ones(self.numberOfCenters) / float(self.numberOfCenters)
        self.mus = []
        self.sigmas = []

        if self.rangemins == None:
            self.rangemins = -ones(xdim)
        if self.rangemaxs == None:
            self.rangemaxs = ones(xdim)
        if self.initCovariances == None:
            if self.diagonalOnly:
                self.initCovariances = ones(xdim)
            else:
                self.initCovariances = eye(xdim)

        for _ in range(self.numberOfCenters):
            self.mus.append(rand(xdim) * (self.rangemaxs - self.rangemins) + self.rangemins)
            self.sigmas.append(dot(eye(xdim), self.initCovariances))

        self.samples = list(range(self.windowSize))
        self.fitnesses = zeros(self.windowSize)
        self.generation = 0
        self.allsamples = []
        self.muevals = []
        self.allmus = []
        self.allsigmas = []
        self.allalphas = []
        self.allUpdateSizes = []
        self.allfitnesses = []
        self.meanShifts = [zeros((self.numParameters)) for _ in range(self.numberOfCenters)]

        self._oneEvaluation(self._initEvaluable)
Exemplo n.º 14
0
def svm_gradient_batch_fast(X_pred, X_exp, y, X_pred_ids, X_exp_ids, w, C=.0001, sigma=1.):
    # sample Kernel
    rnpred = X_pred_ids#sp.random.randint(low=0,high=len(y),size=n_pred_samples)
    rnexpand = X_exp_ids#sp.random.randint(low=0,high=len(y),size=n_expand_samples)
    #K = GaussKernMini_fast(X_pred.T,X_exp.T,sigma)
    X1 = X_pred.T
    X2 = X_exp.T
    if sp.sparse.issparse(X1):
        G = sp.outer(X1.multiply(X1).sum(axis=0), sp.ones(X2.shape[1]))
    else:
        G = sp.outer((X1 * X1).sum(axis=0), sp.ones(X2.shape[1]))
    if sp.sparse.issparse(X2):
        H = sp.outer(X2.multiply(X2).sum(axis=0), sp.ones(X1.shape[1]))
    else:
        H = sp.outer((X2 * X2).sum(axis=0), sp.ones(X1.shape[1]))
    K = sp.exp(-(G + H.T - 2. * fast_dot(X1.T, X2)) / (2. * sigma ** 2))
    # K = sp.exp(-(G + H.T - 2.*(X1.T.dot(X2)))/(2.*sigma**2))
    if sp.sparse.issparse(X1) | sp.sparse.issparse(X2): K = sp.array(K)

    # compute predictions
    yhat = fast_dot(K,w[rnexpand])
    # compute whether or not prediction is in margin
    inmargin = (yhat * y[rnpred]) <= 1
    # compute gradient
    G = C * w[rnexpand] - fast_dot((y[rnpred] * inmargin), K)
    return G,rnexpand
Exemplo n.º 15
0
def estimateBeta(X,Y,K,C=None,addBiasTerm=False,numintervals0=100,ldeltamin0=-5.0,ldeltamax0=5.0):
    """ compute all pvalues
    If numintervalsAlt==0 use EMMA-X trick (keep delta fixed over alternative models)
    """
    n,s=X.shape;
    n_pheno=Y.shape[1];
    S,U=LA.eigh(K);
    UY=SP.dot(U.T,Y);
    UX=SP.dot(U.T,X);
    if (C==None):
        Ucovariate=SP.dot(U.T,SP.ones([n,1]));
    else:
        if (addBiasTerm):
            C_=SP.concatenate((C,SP.ones([n,1])),axis=1)
            Ucovariate=SP.dot(U.T,C_);
        else:
            Ucovariate=SP.dot(U.T,C);
    n_covar=Ucovariate.shape[1];
    beta = SP.empty((n_pheno,s,n_covar+1));
    LL=SP.ones((n_pheno,s))*(-SP.inf);
    ldelta=SP.empty((n_pheno,s));
    sigg2=SP.empty((n_pheno,s));
    pval=SP.ones((n_pheno,s))*(-SP.inf);
    for phen in SP.arange(n_pheno):
        UY_=UY[:,phen];
        ldelta[phen]=optdelta(UY_,Ucovariate,S,ldeltanull=None,numintervals=numintervals0,ldeltamin=ldeltamin0,ldeltamax=ldeltamax0);
        for snp in SP.arange(s):
            UX_=SP.hstack((UX[:,snp:snp+1],Ucovariate));
            nLL_, beta_, sigg2_=nLLeval(ldelta[phen,snp],UY_,UX_,S,MLparams=True);
            beta[phen,snp,:]=beta_;
            sigg2[phen,snp]=sigg2_;
            LL[phen,snp]=-nLL_;
    return beta, ldelta
Exemplo n.º 16
0
Arquivo: mean.py Projeto: PMBio/mtSet
	def _update_indicator(self,K,L):
		""" update the indicator """
		_update = {'term': self.n_terms*SP.ones((K,L)).T.ravel(),
					'row': SP.kron(SP.arange(K)[:,SP.newaxis],SP.ones((1,L))).T.ravel(),
					'col': SP.kron(SP.ones((K,1)),SP.arange(L)[SP.newaxis,:]).T.ravel()} 
		for key in _update.keys():
			self.indicator[key] = SP.concatenate([self.indicator[key],_update[key]])
Exemplo n.º 17
0
def do_compare_wedges(file1="stars-82.txt", file2="Stripe82_coadd.csv", stripe=82,
                      mag=0, size=1.0):
    """ Modify if size is not 1.0 """
    one_run = fi.read_data(file1)
    or_l = len(one_run[:,0])
    or_hist = sv.plot_wedge_density(one_run, stripe, q=0.458, r0=19.4,
                                    name="_rho1", mag=mag, plot=0, size=size)
    coadd = fi.read_data(file2)
    ca_l = len(coadd[:,0])
    ca_hist = sv.plot_wedge_density(coadd, stripe, q=0.458, r0=19.4,
                       name="_rho2", mag=mag, plot=0, size=size)
    # Separate into heights
    or_h = or_hist[:,1]
    ca_h = ca_hist[:,1]
    # Divide the first data set by the second
    if len(or_h) < len(ca_h):
        l = len(or_h)
        extra_h = -0.1*sc.ones((len(ca_h)-l))
    else:
        l = len(ca_h)
        extra_h = 0.1*sc.ones((len(or_h)-l))
    diff_h = sc.zeros(l)
    for i in range(l):
        diff_h[i] = ( or_h[i] / ca_h[i] )
    out = sc.zeros((l,3))
    for i in range(l):
        out[i,0], out[i,1] = ca_hist[i,0], diff_h[i]
        out[i,2] = 1.0 #ma.sqrt(or_hist[i,2]*or_hist[i,2] + ca_hist[i,2]*ca_hist[i,2])
    return out
Exemplo n.º 18
0
def evalgrid1D(f, evalgrid = None, nGrid=10, minval=0.0, maxval = 0.99999, dimF=0):
    '''
    evaluate a function f(x) on all values of a grid.
    --------------------------------------------------------------------------
    Input:
    f(x)    : callable target function
    evalgrid: 1-D array prespecified grid of x-values
    nGrid   : number of x-grid points to evaluate f(x)
    minval  : minimum x-value for optimization of f(x)
    maxval  : maximum x-value for optimization of f(x)
    --------------------------------------------------------------------------
    Output:
    evalgrid    : x-values
    resultgrid  : f(x)-values
    --------------------------------------------------------------------------
    '''
    if evalgrid is None:
        step = (maxval-minval)/(nGrid)
        evalgrid = SP.arange(minval,maxval+step,step)
    if dimF:
        resultgrid = SP.ones((evalgrid.shape[0],dimF))*9999999999999.0
    else:
        resultgrid = SP.ones(evalgrid.shape[0])*9999999999999.0
    for i in xrange(evalgrid.shape[0]):        
        fevalgrid = f(evalgrid[i])

        is_real=False
        try:
            is_real = SP.isreal(fevalgrid).all()
        except:
            is_real = SP.isreal(fevalgrid)
        assert is_real,"function returned imaginary value"

        resultgrid[i] = fevalgrid
    return (evalgrid,resultgrid)
 def _generate_masked_mesh(self, cell_mask=None):
     r"""
     Generates the mesh based on the cell mask provided
     """
     #
     if cell_mask is None:
         cell_mask = sp.ones(self.data_map.shape, dtype=bool)
     #
     # initializing arrays
     self._edges = sp.ones(0, dtype=str)
     self._merge_patch_pairs = sp.ones(0, dtype=str)
     self._create_blocks(cell_mask)
     #
     # building face arrays
     mapper = sp.ravel(sp.array(cell_mask, dtype=int))
     mapper[mapper == 1] = sp.arange(sp.count_nonzero(mapper))
     mapper = sp.reshape(mapper, (self.nz, self.nx))
     mapper[~cell_mask] = -sp.iinfo(int).max
     #
     boundary_dict = {
         'bottom':
             {'bottom': mapper[0, :][cell_mask[0, :]]},
         'top':
             {'top': mapper[-1, :][cell_mask[-1, :]]},
         'left':
             {'left': mapper[:, 0][cell_mask[:, 0]]},
         'right':
             {'right': mapper[:, -1][cell_mask[:, -1]]},
         'front':
             {'front': mapper[cell_mask]},
         'back':
             {'back': mapper[cell_mask]},
         'internal':
             {'bottom': [], 'top': [], 'left': [], 'right': []}
     }
     #
     # determining cells linked to a masked cell
     cell_mask = sp.where(~sp.ravel(cell_mask))[0]
     inds = sp.in1d(self._field._cell_interfaces, cell_mask)
     inds = sp.reshape(inds, (len(self._field._cell_interfaces), 2))
     inds = inds[:, 0].astype(int) + inds[:, 1].astype(int)
     inds = (inds == 1)
     links = self._field._cell_interfaces[inds]
     #
     # adjusting order so masked cells are all on links[:, 1]
     swap = sp.in1d(links[:, 0], cell_mask)
     links[swap] = links[swap, ::-1]
     #
     # setting side based on index difference
     sides = sp.ndarray(len(links), dtype='<U6')
     sides[sp.where(links[:, 1] == links[:, 0]-self.nx)[0]] = 'bottom'
     sides[sp.where(links[:, 1] == links[:, 0]+self.nx)[0]] = 'top'
     sides[sp.where(links[:, 1] == links[:, 0]-1)[0]] = 'left'
     sides[sp.where(links[:, 1] == links[:, 0]+1)[0]] = 'right'
     #
     # adding each block to the internal face dictionary
     inds = sp.ravel(mapper)[links[:, 0]]
     for side, block_id in zip(sides, inds):
         boundary_dict['internal'][side].append(block_id)
     self.set_boundary_patches(boundary_dict, reset=True)
Exemplo n.º 20
0
def alloc_numpy_arrays(number_cells, space_direction, initval=0,
		dtype='f'):
	"""
	"""
	space = [sc.ones((1,1,1), dtype),\
			sc.ones((1,1,1), dtype),\
			sc.ones((1,1,1), dtype)]
	number_cells = tuple(number_cells)
	
	if 'x' in space_direction:
		space[x_axis] = sc.zeros(number_cells, dtype)
	if 'y' in space_direction:
		space[y_axis] = sc.zeros(number_cells, dtype)
	if 'z' in space_direction:
		space[z_axis] = sc.zeros(number_cells, dtype)
		
	if initval != 0:
		if len(number_cells) == 3:
			space[x_axis][:,:,:] = initval
			space[y_axis][:,:,:] = initval
			space[z_axis][:,:,:] = initval
		elif len(number_cells) == 2:
			space[x_axis][:,:] = initval
			space[y_axis][:,:] = initval
			space[z_axis][:,:] = initval
			
	return space
Exemplo n.º 21
0
    def __init__(self, typ, numOGaus=10, alphaA=0.02, alphaM=0.02, alphaS=0.02):
        self.typ = typ
        self.alphaA = alphaA
        self.alphaM = alphaM
        self.alphaS = alphaS
        self.minSig = 0.000001
        self.numOGaus = numOGaus #Number of Gaussians
        self.rangeMin = -20.0
        self.rangeMax = 20.0
        self.epsilon = (self.rangeMax - self.rangeMin) / (sqrt(2.0) * float(self.numOGaus - 1)) #Initial value of sigmas

        self.propFakt = 1.0 / float(self.numOGaus)
        self.distFakt = 1.0 / float(self.numOGaus - 1)
        self.distRange = self.rangeMax - self.rangeMin

        self.sigma = ones(self.numOGaus)
        self.mue = zeros(self.numOGaus)
        self.alpha = ones(self.numOGaus)
        self.sigma *= self.epsilon
        self.alpha /= float(self.numOGaus)
        self.alpha = self.invSigmo(self.alpha)
        for i in range(self.numOGaus):
            self.mue[i] = self.distRange * float(i) * self.distFakt + self.rangeMin
        self.baseline = 0.0
        self.best = 0.000001
Exemplo n.º 22
0
Arquivo: ves.py Projeto: HKou/pybrain
    def __init__(self, evaluator, evaluable, **parameters):
        BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters)
        
        self.numParams = self.xdim + self.xdim * (self.xdim+1) / 2
                
        if self.momentum != None:
            self.momentumVector = zeros(self.numParams)
        if self.learningRateSigma == None:
            self.learningRateSigma = self.learningRate
        
        if self.rangemins == None:
            self.rangemins = -ones(self.xdim)
        if self.rangemaxs == None:
            self.rangemaxs = ones(self.xdim)
        if self.initCovariances == None:
            if self.diagonalOnly:
                self.initCovariances = ones(self.xdim)
            else:
                self.initCovariances = eye(self.xdim)

        self.x = rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins
        self.sigma = dot(eye(self.xdim), self.initCovariances)
        self.factorSigma = cholesky(self.sigma)
        
        self.reset()
def make_data_twoclass(N=50):
    # generates some toy data
    mu = sp.array([[0,2],[0,-2]]).T
    C = sp.array([[5.,4.],[4.,5.]])
    X = sp.hstack((mvn(mu[:,0],C,N/2).T, mvn(mu[:,1],C,N/2).T))
    Y = sp.hstack((sp.ones((1,N/2.)),-sp.ones((1,N/2.))))
    return X,Y
Exemplo n.º 24
0
    def addFixedEffect(self, F=None, A=None, Ftest=None):
        """
        add fixed effect term to the model

        Args:
            F:     sample design matrix for the fixed effect [N,K]
            A:     trait design matrix for the fixed effect (e.g. sp.ones((1,P)) common effect; sp.eye(P) any effect) [L,P]
            Ftest: sample design matrix for test samples [Ntest,K]
        """
        if A is None:
            A = sp.eye(self.P)
        if F is None:
            F = sp.ones((self.N,1))
            if self.Ntest is not None:
                Ftest = sp.ones((self.Ntest,1))

        assert A.shape[1]==self.P, 'VarianceDecomposition:: A has incompatible shape'
        assert F.shape[0]==self.N, 'VarianceDecimposition:: F has incompatible shape'

        if Ftest is not None:
            assert self.Ntest is not None, 'VarianceDecomposition:: specify Ntest for predictions (method VarianceDecomposition::setTestSampleSize)'
            assert Ftest.shape[0]==self.Ntest, 'VarianceDecimposition:: Ftest has incompatible shape'
            assert Ftest.shape[1]==F.shape[1], 'VarianceDecimposition:: Ftest has incompatible shape'

        # add fixed effect
        self.sample_designs.append(F)
        self.sample_test_designs.append(Ftest)
        self.trait_designs.append(A)
 
        self._desync()
Exemplo n.º 25
0
Arquivo: fem.py Projeto: HKou/pybrain
    def __init__(self, evaluator, evaluable, **parameters):
        BlackBoxOptimizer.__init__(self, evaluator, evaluable, **parameters)
        self.alphas = ones(self.numberOfCenters)/self.numberOfCenters
        self.mus = []
        self.sigmas = []

        self.tau = 1.
        if self.rangemins == None:
            self.rangemins = -ones(self.xdim)
        if self.rangemaxs == None:
            self.rangemaxs = ones(self.xdim)
        if self.initCovariances == None:
            self.initCovariances = eye(self.xdim)
            
        if self.elitist and self.numberOfCenters == 1 and not self.noisyEvaluator:
            # in the elitist case seperate evaluations are not necessary. 
            # CHECKME: maybe in the noisy case?
            self.evalMus = False
            
        assert not(self.useCauchy and self.numberOfCenters > 1)
            
        for dummy in range(self.numberOfCenters):
            self.mus.append(rand(self.xdim) * (self.rangemaxs-self.rangemins) + self.rangemins)
            self.sigmas.append(dot(eye(self.xdim), self.initCovariances))
        self.reset()
Exemplo n.º 26
0
def makedata(testpath):
    """ This will make the input data for the test case. The data will have the 
        default set of parameters Ne=Ne=1e11 and Te=Ti=2000.
        Inputs
            testpath - Directory that will hold the data.
            
    """
    finalpath = testpath.joinpath('Origparams')
    if not finalpath.exists():
        finalpath.mkdir()
    data=SIMVALUES
    z = sp.linspace(50.,1e3,50)
    nz = len(z)
    params = sp.tile(data[sp.newaxis,sp.newaxis,:,:],(nz,1,1,1))
    coords = sp.column_stack((sp.ones(nz),sp.ones(nz),z))
    species=['O+','e-']
    times = sp.array([[0,1e3]])
    vel = sp.zeros((nz,1,3))
    Icont1 = IonoContainer(coordlist=coords,paramlist=params,times = times,sensor_loc = sp.zeros(3),ver =0,coordvecs =
        ['x','y','z'],paramnames=None,species=species,velocity=vel)
        
    finalfile = finalpath.joinpath('0 stats.h5')
    Icont1.saveh5(str(finalfile))
    # set start temp to 1000 K.
    Icont1.Param_List[:,:,:,1]=1e3
    Icont1.saveh5(str(testpath.joinpath('startfile.h5')))
Exemplo n.º 27
0
def gensquexpIPdraw(d,lb,ub,sl,su,sfn,sls,cfn):
    #axis = 0 value = sl
    #d dimensional objective +1 for s
    nt=25
    #print sp.hstack([sp.array([[sl]]),lb])
    #print sp.hstack([sp.array([[su]]),ub])
    [X,Y,S,D] = ESutils.gen_dataset(nt,d+1,sp.hstack([sp.array([[sl]]),lb]).flatten(),sp.hstack([sp.array([[su]]),ub]).flatten(),GPdc.SQUEXP,sp.array([1.5]+[sls]+[0.30]*d))
    G = GPdc.GPcore(X,Y,S,D,GPdc.kernel(GPdc.SQUEXP,d+1,sp.array([1.5]+[sls]+[0.30]*d)))
    def obj(x,s,d,override=False):
        x = x.flatten()
        if sfn(x)==0. or override:
            noise = 0.
        else:
            noise = sp.random.normal(scale=sp.sqrt(sfn(x)))
        
        return [G.infer_m(x,[d])[0,0]+noise,cfn(x)]
    def dirwrap(x,y):
        z = obj(sp.array([[sl]+[i for i in x]]),sl,[sp.NaN],override=True)
        return (z,0)
    [xmin0,ymin0,ierror] = DIRECT.solve(dirwrap,lb,ub,user_data=[], algmethod=1, maxf=89000, logfilename='/dev/null')
    lb2 = xmin0-sp.ones(d)*1e-4
    ub2 = xmin0+sp.ones(d)*1e-4
    [xmin,ymin,ierror] = DIRECT.solve(dirwrap,lb2,ub2,user_data=[], algmethod=1, maxf=89000, logfilename='/dev/null')
    #print "RRRRR"+str([xmin0,xmin,ymin0,ymin,xmin0-xmin,ymin0-ymin])
    return [obj,xmin,ymin]
Exemplo n.º 28
0
def plot_median_errors(RefinementLevels):
        for i in RefinementLevels[0].cases:
            x =[];
            y =[];
            print "Analyzing median error on: ", i ;
            for r in RefinementLevels:                
                x.append(r.LUT.D_dim*r.LUT.P_dim)
                r.get_REL_ERR_SU2(i)
                y.append(r.SU2[i].median_ERR*100)
            
            x = sp.array(x)
            y = sp.array(y)            
            y = y[sp.argsort(x)]
            x = x[sp.argsort(x)]
                                    
            LHM = sp.ones((len(x),2))
            RHS = sp.ones((len(x),1))            
            LHM[:,1] = sp.log10(x)
            RHS[:,0] = sp.log10(y)

            sols = sp.linalg.lstsq(LHM,RHS)
            b = -sols[0][1]
            plt.loglog(x,y, label='%s, %s'%(i,r'$O(\frac{1}{N})^{%s}$'%str(sp.around(b,2))), basex=10, basey=10, \
                       subsy=sp.linspace(10**(-5), 10**(-2),20),\
                       subsx=sp.linspace(10**(2), 10**(5),50))
            
            #for r in RefinementLevels:                
               # x.append(r.LUT.D_dim*r.LUT.P_dim)
              #  r.get_REL_ERR_SciPy(i)
             #   y.append(r.SciPy[i].median_ERR*100)
            #plt.plot(x,y, label='SciPy: %s'%i)
        plt.grid(which='both')
        plt.xlabel('Grid Nodes (N)')
        plt.ylabel('Median relative error [%]')
        return;
Exemplo n.º 29
0
def lossTraces(fwrap, aclass, dim, maxsteps, storesteps=None, x0=None,
               initNoise=0., minLoss=1e-10, algoparams={}):
    """ Compute a number of loss curves, for the provided settings,
    stored at specific storestep points. """
    if not storesteps:
        storesteps = range(maxsteps + 1)
    
    # initial points, potentially noisy
    if x0 is None:
        x0 = ones(dim) + randn(dim) * initNoise
    
    # tracking progress by callback
    paramtraces = {'index':-1}
    def storer(a):
        lastseen = paramtraces['index']
        for ts in [x for x in storesteps if x > lastseen and x <= a._num_updates]:
            paramtraces[ts] = a.bestParameters.copy()
        paramtraces['index'] = a._num_updates
        
    # initialization    
    algo = aclass(fwrap, x0, callback=storer, **algoparams)
    print algo, fwrap, dim, maxsteps,
    
    # store initial step   
    algo.callback(algo)
    algo.run(maxsteps)

    # process learning curve
    del paramtraces['index']
    paramtraces = array([x for _, x in sorted(paramtraces.items())])
    oloss = mean(fwrap.stochfun.expectedLoss(ones(100) * fwrap.stochfun.optimum))
    ls = abs(fwrap.stochfun.expectedLoss(ravel(paramtraces)) - oloss) + minLoss
    ls = reshape(ls, paramtraces.shape)
    print median(ls[-1])
    return ls
Exemplo n.º 30
0
 def range_query_geno_local(self, idx_start=None, idx_end=None, chrom=None,pos_start=None, pos_end=None,windowsize=0):
     """
     return an index for a range query on the genotypes
     """
     if idx_start==None and idx_end==None and pos_start==None and pos_end==None and chrom==None:
         return  sp.arange(0,self.num_snps)
     elif idx_start is not None or idx_end is not None:
         if idx_start is None:
             idx_start = 0
         if idx_end is None:
             idx_end = self.num_snps
         res =  sp.arange(idx_start,idx_end)
         return res
     elif chrom is not None:
         res = self.geno_pos["chrom"]==chrom
     elif pos_start is not None or pos_end is not None:
         if pos_start is not None and pos_end is not None:
             assert pos_start[0] == pos_end[0], "chromosomes have to match"
         
         if pos_start is None:
             idx_larger =  sp.ones(self.num_snps,dtype=bool)
         else:
             idx_larger = (self.geno_pos["pos"]>=(pos_start[1]-windowsize)) & (self.geno_pos["chrom"]==pos_start[0])
         if pos_end is None:
             idx_smaller =  sp.ones(self.num_snps,dtype=bool)
         else:
             idx_smaller = (self.geno_pos["pos"]<=(pos_end[1]+windowsize)) & (self.geno_pos["chrom"]==pos_end[0])
         res = idx_smaller & idx_larger
     else:
         raise Exception("This should not be triggered")#res =  sp.ones(self.geno_pos.shape,dtype=bool)
     return  sp.where(res)[0]
Exemplo n.º 31
0
        print(last + ' Arnorm  =  %12.4e' % (Arnorm, ))
        print(last + msg[istop + 1])

    if istop == 6:
        info = maxiter
    else:
        info = 0

    return (postprocess(x), info)


if __name__ == '__main__':
    from scipy import ones, arange
    from scipy.linalg import norm
    from scipy.sparse import spdiags

    n = 10

    residuals = []

    def cb(x):
        residuals.append(norm(b - A * x))

    #A = poisson((10,),format='csr')
    A = spdiags([arange(1, n + 1, dtype=float)], [0], n, n, format='csr')
    M = spdiags([1.0 / arange(1, n + 1, dtype=float)], [0], n, n, format='csr')
    A.psolve = M.matvec
    b = 0 * ones(A.shape[0])
    x = minres(A, b, tol=1e-12, maxiter=None, callback=cb)
    #x = cg(A,b,x0=b,tol=1e-12,maxiter=None,callback=cb)[0]
Exemplo n.º 32
0
def cvglmnet(*, x,
             y,
             family = 'gaussian',
             ptype = 'default',
             nfolds = 10,
             foldid = scipy.empty([0]),
             parallel = 1,
             keep = False,
             grouped = True,
             **options):

    options = glmnetSet(options)

    if 0 < len(options['lambdau']) < 2:
        raise ValueError('Need more than one value of lambda for cv.glmnet')
    
    nobs = x.shape[0]

    # we should not really need this. user must supply the right shape
    # if y.shape[0] != nobs:
    #    y = scipy.transpose(y)
        
    # convert 1d python array of size nobs to 2d python array of size nobs x 1
    if len(y.shape) == 1:
        y = scipy.reshape(y, [y.size, 1])

    # we should not really need this. user must supply the right shape       
    # if (len(options['offset']) > 0) and (options['offset'].shape[0] != nobs):
    #    options['offset'] = scipy.transpose(options['offset'])
    
    if len(options['weights']) == 0:
        options['weights'] = scipy.ones([nobs, 1], dtype = scipy.float64)

    # main call to glmnet        
    glmfit = glmnet(x = x, y = y, family = family, **options)    

    is_offset = glmfit['offset']
    options['lambdau'] = glmfit['lambdau']
    
    nz = glmnetPredict(glmfit, scipy.empty([0]), scipy.empty([0]), 'nonzero')
    if glmfit['class'] == 'multnet':        
        nnz = scipy.zeros([len(options['lambdau']), len(nz)])
        for i in range(len(nz)):
            nnz[:, i] = scipy.transpose(scipy.sum(nz[i], axis = 0))
        nz = scipy.ceil(scipy.median(nnz, axis = 1))    
    elif glmfit['class'] == 'mrelnet':
        nz = scipy.transpose(scipy.sum(nz[0], axis = 0))
    else:
        nz = scipy.transpose(scipy.sum(nz, axis = 0))
    
    if len(foldid) == 0:
        ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(nobs/nfolds))])
        mb = scipy.arange(scipy.mod(nobs, nfolds))
        mb = scipy.reshape(mb, [1, mb.size])
        population = scipy.append(ma, mb, axis = 1)
        mc = scipy.random.permutation(len(population))
        mc = mc[0:nobs]
        foldid = population[mc]
        foldid = scipy.reshape(foldid, [foldid.size,])
    else:
        nfolds = scipy.amax(foldid) + 1
        
    if nfolds < 3:
        raise ValueError('nfolds must be bigger than 3; nfolds = 10 recommended')        
        
    cpredmat = list()
    foldid = scipy.reshape(foldid, [foldid.size, ])
    if parallel != 1:
        if parallel == -1:
            num_cores = multiprocessing.cpu_count()
        else
            num_cores = parallel
        sys.stderr.write("[status]\tParallel glmnet cv with " + str(num_cores) + " cores\n")
        cpredmat = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(doCV)(i, x, y, family, foldid, nfolds, is_offset, **options) for i in range(nfolds))
    else:
        for i in range(nfolds):
            newFit = doCV(i, x, y, family, foldid, nfolds, is_offset, **options)
            cpredmat.append(newFit)
        
    if cpredmat[0]['class'] == 'elnet':
        cvstuff = cvelnet( cpredmat, options['lambdau'], x, y \
                          , options['weights'], options['offset'] \
                          , foldid, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'lognet':
        cvstuff = cvlognet(cpredmat, options['lambdau'], x, y \
                          , options['weights'], options['offset'] \
                          , foldid, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'multnet':
        cvstuff = cvmultnet(cpredmat, options['lambdau'], x, y \
                          , options['weights'], options['offset'] \
                          , foldid, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'mrelnet':
        cvstuff = cvmrelnet(cpredmat, options['lambdau'], x, y \
                          , options['weights'], options['offset'] \
                          , foldid, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'fishnet':
        cvstuff = cvfishnet(cpredmat, options['lambdau'], x, y \
                           , options['weights'], options['offset'] \
                           , foldid, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'coxnet':
        raise NotImplementedError('Cross-validation for coxnet not implemented yet.')
        #cvstuff = cvcoxnet(cpredmat, options['lambdau'], x, y \
        #                  , options['weights'], options['offset'] \
        #                  , foldid, ptype, grouped, keep)
 
    cvm = cvstuff['cvm']
    cvsd = cvstuff['cvsd']
    cvname = cvstuff['name']

    CVerr = dict()
    CVerr['lambdau'] = options['lambdau']       
    CVerr['cvm'] = scipy.transpose(cvm)
    CVerr['cvsd'] = scipy.transpose(cvsd)
    CVerr['cvup'] = scipy.transpose(cvm + cvsd)
    CVerr['cvlo'] = scipy.transpose(cvm - cvsd)
    CVerr['nzero'] = nz
    CVerr['name'] = cvname
    CVerr['glmnet_fit'] = glmfit
    if keep:
        CVerr['fit_preval'] = cvstuff['fit_preval']
        CVerr['foldid'] = foldid
    if ptype == 'auc':
        cvm = -cvm
    CVerr['lambda_min'] = scipy.amax(options['lambdau'][cvm <= scipy.amin(cvm)]).reshape([1])  
    idmin = options['lambdau'] == CVerr['lambda_min']
    semin = cvm[idmin] + cvsd[idmin]
    CVerr['lambda_1se'] = scipy.amax(options['lambdau'][cvm <= semin]).reshape([1])
    CVerr['class'] = 'cvglmnet'
    
    return(CVerr)
Exemplo n.º 33
0
def _create_feature_vector(pixel_group):
    """
    Generates the feature vector, given a square bunch of pixels.
    
    ``pixel_group`` by itself is actually a list of pixel groups (sub-images).
    Each sub-image will become a different part of the vector
    """
    
    # Initialise some values that we'll use later
    feature_vector = sp.empty(FEATURE_VECTOR_SIZE)
    num_pixels = pixel_group[0].shape[0]
    # Find the angles of each point in degrees
    x = sp.arange(-num_pixels/2, num_pixels/2)
    grid = sp.meshgrid(x, x)
    angle = sp.angle(grid[0] + 1j*grid[1], deg=True)
    
    # Create histrogram buckets and find indices of the spectrum array that
    # fall into a particular bucket
    diff = -360/NUM_HISTOGRAM_BUCKETS
    buckets = sp.arange(180, -180+diff, diff)
    indices = {}
    for i in range(0, NUM_HISTOGRAM_BUCKETS):
        indices[i] = sp.where((angle <= buckets[i]) * 
                              (angle > buckets[i+1]) )
    buckets = buckets[:-1]
    
    # Average out the Cb and Cr components and add it to the feature vector
    feature_vector[0] = sp.dot(sp.ones((1, num_pixels)), 
                               pixel_group[0].dot(sp.ones((num_pixels, 1))))
    feature_vector[0] /= num_pixels * num_pixels
    feature_vector[1] = sp.dot(sp.ones((1, num_pixels)), 
                               pixel_group[1].dot(sp.ones((num_pixels, 1))))
    feature_vector[1] /= num_pixels * num_pixels
    
    # The other five elements are the orientation entropies at different scales
    for i in range(2, FEATURE_VECTOR_SIZE):
        # First calculate the centre-shifted fourier transform of the pixel
        # group, and then apply a log transformation to get the magnitude
        # spectrum
        transformed_pixel_group = np.fft.fft2(pixel_group[i])
        centre_shifted_pixel_group = np.fft.fftshift(transformed_pixel_group)
        fourier_spectrum = sp.log(abs(centre_shifted_pixel_group) + 1)
        
        # Calculate the orientation histogram of the log magnitude spectrum
        # by summing over groups of angles. The histogram value at a given
        # angle should give the power in the log magnitude spectrum around that
        # angle (approximately)
        histogram = sp.empty(buckets.shape)
        for j in range(NUM_HISTOGRAM_BUCKETS):
            histogram[j] = fourier_spectrum[indices[j]].sum()
        
        # Finally, calculate the orientation entropy based on the standard
        # statistical formula:
        #       E = H(θ) * log(H(θ))
        if not histogram.all():
            entropy = 0
        else:
            entropy = - (histogram * sp.log(histogram)).sum()
        if sp.isnan(entropy):
            print histogram
            print fourier_spectrum
            sys.exit(1)
        # The scaling attempts to make the entropy value the same order as the
        # Cb and Cr values. This does not guarantee a range of 0-255 however.
        scaling = (BASE_PIXEL_GROUP_SIZE / num_pixels) ** 2
        feature_vector[i] = entropy * scaling
    
    return feature_vector
from scipy.interpolate import griddata

mesh_x = S.loadtxt("mesh_x.txt")
mesh_y = S.loadtxt("mesh_y.txt")

the_splines = list()
for i in range(mesh_x.shape[0]):
    the_splines.append(ParametricSpline(mesh_x[i], mesh_y[i]))

SAMPLE_NUMBER = 100

ts = S.linspace(0.0, 1.0, SAMPLE_NUMBER)

old_xy = S.vstack([aspline(ts) for aspline in the_splines])
new_xy = S.vstack([
    S.hstack([i * S.ones((SAMPLE_NUMBER, 1)),
              ts.reshape(-1, 1)]) for i in range(len(the_splines))
])

new_xs = griddata(old_xy, new_xy[:, 0], (x, z), method='linear')
new_ys = griddata(old_xy, new_xy[:, 1], (x, z), method='linear')

disp_genes = [
    "kni__3", "D__3", "hbP__3", "bcdP__3", "KrP__3", "gt__3", "eve__3",
    "odd__3", "rho__3", "sna__3"
]
#disp_genes = ["eve__3"]

for one_gene_name in disp_genes:

    colnum = results[0]["column"].index(one_gene_name) - 1
Exemplo n.º 35
0
    def create_incidence_matrix(self,
                                weights=None,
                                fmt='coo',
                                drop_zeros=False):
        r"""
        Creates a weighted incidence matrix in the desired sparse format

        Parameters
        ----------
        weights : array_like, optional
            An array containing the throat values to enter into the matrix (In
            graph theory these are known as the 'weights').  If omitted, ones
            are used to create a standard incidence matrix representing
            connectivity only.

        fmt : string, optional
            The sparse storage format to return.  Options are:

            **'coo'** : (default) This is the native format of OpenPNMs data

            **'lil'** : Enables row-wise slice of the matrix

            **'csr'** : Favored by most linear algebra routines

            **'dok'** : Enables subscript access of locations

        drop_zeros : boolean (default is ``False``)
            If ``True``, applies the ``eliminate_zeros`` method of the sparse
            array to remove all zero locations.

        Returns
        -------
        An incidence matrix in the specified sparse format

        Notes
        -----
        The incidence matrix is a cousin to the adjacency matrix, and used by
        OpenPNM for finding the throats connected to a give pore or set of
        pores.  Specifically, an incidence matrix has Np rows and Nt columns,
        and each row represents a pore, containing non-zero values at the
        locations corresponding to the indices of the throats connected to that
        pore.  The ``weights`` argument indicates what value to place at each
        location, with the default being 1's to simply indicate connections.
        Another useful option is throat indices, such that the data values
        on each row indicate which throats are connected to the pore, though
        this is redundant as it is identical to the locations of non-zeros.

        Examples
        --------
        >>> import openpnm as op
        >>> pn = op.network.Cubic(shape=[5, 5, 5])
        >>> weights = sp.rand(pn.num_throats(), ) < 0.5
        >>> im = pn.create_incidence_matrix(weights=weights, fmt='csr')
        """
        # Check if provided data is valid
        if weights is None:
            weights = sp.ones((self.Nt, ), dtype=int)
        elif sp.shape(weights)[0] != self.Nt:
            raise Exception('Received dataset of incorrect length')

        conn = self['throat.conns']
        row = conn[:, 0]
        row = sp.append(row, conn[:, 1])
        col = sp.arange(self.Nt)
        col = sp.append(col, col)
        weights = sp.append(weights, weights)

        temp = sprs.coo.coo_matrix((weights, (row, col)), (self.Np, self.Nt))

        if drop_zeros:
            temp.eliminate_zeros()

        # Convert to requested format
        if fmt == 'coo':
            pass  # temp is already in coo format
        elif fmt == 'csr':
            temp = temp.tocsr()
        elif fmt == 'lil':
            temp = temp.tolil()
        elif fmt == 'dok':
            temp = temp.todok()

        return temp
Exemplo n.º 36
0
    def create_adjacency_matrix(self,
                                weights=None,
                                fmt='coo',
                                triu=False,
                                drop_zeros=False):
        r"""
        Generates a weighted adjacency matrix in the desired sparse format

        Parameters
        ----------
        weights : array_like, optional
            An array containing the throat values to enter into the matrix
            (in graph theory these are known as the 'weights').

            If the array is Nt-long, it implies that the matrix is symmetric,
            so the upper and lower triangular regions are mirror images.  If
            it is 2*Nt-long then it is assumed that the first Nt elements are
            for the upper triangle, and the last Nt element are for the lower
            triangular.

            If omitted, ones are used to create a standard adjacency matrix
            representing connectivity only.

        fmt : string, optional
            The sparse storage format to return.  Options are:

            **'coo'** : (default) This is the native format of OpenPNM data

            **'lil'** : Enables row-wise slice of the matrix

            **'csr'** : Favored by most linear algebra routines

            **'dok'** : Enables subscript access of locations

        triu : boolean (default is ``False``)
            If ``True``, the returned sparse matrix only contains the upper-
            triangular elements.  This argument is ignored if the ``weights``
            array is 2*Nt-long.

        drop_zeros : boolean (default is ``False``)
            If ``True``, applies the ``eliminate_zeros`` method of the sparse
            array to remove all zero locations.

        Returns
        -------
        An adjacency matrix in the specified Scipy sparse format.

        Notes
        -----
        The adjacency matrix is used by OpenPNM for finding the pores
        connected to a give pore or set of pores.  Specifically, an adjacency
        matrix has Np rows and Np columns.  Each row represents a pore,
        containing non-zero values at the locations corresponding to the
        indices of the pores connected to that pore.  The ``weights`` argument
        indicates what value to place at each location, with the default
        being 1's to simply indicate connections. Another useful option is
        throat indices, such that the data values on each row indicate which
        throats are connected to the pore.

        Examples
        --------
        >>> import openpnm as op
        >>> pn = op.network.Cubic(shape=[5, 5, 5])
        >>> weights = sp.rand(pn.num_throats(), ) < 0.5
        >>> am = pn.create_adjacency_matrix(weights=weights, fmt='csr')

        """
        # Check if provided data is valid
        if weights is None:
            weights = sp.ones((self.Nt, ), dtype=int)
        elif sp.shape(weights)[0] not in [self.Nt, 2 * self.Nt, (self.Nt, 2)]:
            raise Exception('Received weights are of incorrect length')

        # Append row & col to each other, and data to itself
        conn = self['throat.conns']
        row = conn[:, 0]
        col = conn[:, 1]
        if weights.shape == (2 * self.Nt, ):
            row = sp.append(row, conn[:, 1])
            col = sp.append(col, conn[:, 0])
        elif weights.shape == (self.Nt, 2):
            row = sp.append(row, conn[:, 1])
            col = sp.append(col, conn[:, 0])
            weights = weights.flatten(order='F')
        elif not triu:
            row = sp.append(row, conn[:, 1])
            col = sp.append(col, conn[:, 0])
            weights = sp.append(weights, weights)

        # Generate sparse adjacency matrix in 'coo' format
        temp = sprs.coo_matrix((weights, (row, col)), (self.Np, self.Np))

        if drop_zeros:
            temp.eliminate_zeros()

        # Convert to requested format
        if fmt == 'coo':
            pass  # temp is already in coo format
        elif fmt == 'csr':
            temp = temp.tocsr()
        elif fmt == 'lil':
            temp = temp.tolil()
        elif fmt == 'dok':
            temp = temp.todok()

        return temp
Exemplo n.º 37
0
def calc_risk_scores(bed_file,
                     rs_id_map,
                     phen_map,
                     out_file=None,
                     split_by_chrom=False,
                     adjust_for_sex=False,
                     adjust_for_covariates=False,
                     adjust_for_pcs=False,
                     non_zero_chromosomes=None,
                     only_score=False,
                     verbose=False,
                     summary_dict=None):
    print('Parsing PLINK bed file: %s' % bed_file)

    if split_by_chrom:
        num_individs = len(phen_map)
        assert num_individs > 0, 'No individuals found.  Problems parsing the phenotype file?'
        pval_derived_effects_prs = sp.zeros(num_individs)

        for i in range(1, 23):
            if non_zero_chromosomes is None or i in non_zero_chromosomes:
                genotype_file = bed_file + '_%i_keep' % i
                if os.path.isfile(genotype_file + '.bed'):
                    if verbose:
                        print('Working on chromosome %d' % i)
                    prs_dict = get_prs(genotype_file,
                                       rs_id_map,
                                       phen_map,
                                       only_score=only_score,
                                       verbose=verbose)

                    pval_derived_effects_prs += prs_dict[
                        'pval_derived_effects_prs']
            elif verbose:
                print('Skipping chromosome')

    else:
        prs_dict = get_prs(bed_file,
                           rs_id_map,
                           phen_map,
                           only_score=only_score,
                           verbose=verbose)
        num_individs = len(prs_dict['iids'])
        pval_derived_effects_prs = prs_dict['pval_derived_effects_prs']

    if only_score:
        write_only_scores_file(out_file, prs_dict, pval_derived_effects_prs)
        res_dict = {}
    elif sp.std(prs_dict['true_phens']) == 0:
        print('No variance left to explain in phenotype.')
        res_dict = {'pred_r2': 0}
    else:
        # Report prediction accuracy
        assert len(
            phen_map
        ) > 0, 'No individuals found.  Problems parsing the phenotype file?'

        pval_eff_corr = sp.corrcoef(pval_derived_effects_prs,
                                    prs_dict['true_phens'])[0, 1]
        pval_eff_r2 = pval_eff_corr**2

        res_dict = {'pred_r2': pval_eff_r2}

        pval_derived_effects_prs.shape = (len(pval_derived_effects_prs), 1)
        true_phens = sp.array(prs_dict['true_phens'])
        true_phens.shape = (len(true_phens), 1)

        # Store covariate weights, slope, etc.
        weights_dict = {}

        # Store Adjusted predictions
        adj_pred_dict = {}

        # Direct effect
        Xs = sp.hstack(
            [pval_derived_effects_prs,
             sp.ones((len(true_phens), 1))])
        (betas, rss00, r, s) = linalg.lstsq(sp.ones((len(true_phens), 1)),
                                            true_phens)
        (betas, rss, r, s) = linalg.lstsq(Xs, true_phens)
        pred_r2 = 1 - rss / rss00
        weights_dict['unadjusted'] = {
            'Intercept': betas[1][0],
            'ldpred_prs_effect': betas[0][0]
        }

        if verbose:
            print('PRS correlation: %0.4f' % pval_eff_corr)
        print('Variance explained (Pearson R2) by PRS: %0.4f' % pred_r2)

        # Adjust for sex
        if adjust_for_sex and 'sex' in prs_dict and len(prs_dict['sex']) > 0:
            sex = sp.array(prs_dict['sex'])
            sex.shape = (len(sex), 1)
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([sex,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs, sex,
                 sp.ones((len(true_phens), 1))])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['sex_adj'] = {
                'Intercept': betas[2][0],
                'ldpred_prs_effect': betas[0][0],
                'sex': betas[1][0]
            }
            if verbose:
                print(
                    'Fitted effects (betas) for PRS, sex, and intercept on true phenotype:',
                    betas)
            adj_pred_dict['sex_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Sex: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Sex : %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Sex_adj_pred_r2+Sex'] = pred_r2

        # Adjust for PCs
        if adjust_for_pcs and 'pcs' in prs_dict and len(prs_dict['pcs']) > 0:
            pcs = prs_dict['pcs']
            (betas, rss0, r,
             s) = linalg.lstsq(sp.hstack([pcs,
                                          sp.ones((len(true_phens), 1))]),
                               true_phens)
            Xs = sp.hstack(
                [pval_derived_effects_prs,
                 sp.ones((len(true_phens), 1)), pcs])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            weights_dict['pc_adj'] = {
                'Intercept': betas[1][0],
                'ldpred_prs_effect': betas[0][0],
                'pcs': betas[2][0]
            }
            adj_pred_dict['pc_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for PCs: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + PCs: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['PC_adj_pred_r2+PC'] = pred_r2

            # Adjust for both PCs and Sex
            if adjust_for_sex and 'sex' in prs_dict and len(
                    prs_dict['sex']) > 0:
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack([sex, pcs,
                               sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, sex,
                    sp.ones((len(true_phens), 1)), pcs
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                weights_dict['sex_pc_adj'] = {
                    'Intercept': betas[2][0],
                    'ldpred_prs_effect': betas[0][0],
                    'sex': betas[1][0],
                    'pcs': betas[3][0]
                }
                adj_pred_dict['sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for PCs and Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['PC_Sex_adj_pred_r2+PC_Sex'] = pred_r2

        # Adjust for covariates
        if adjust_for_covariates and 'covariates' in prs_dict and len(
                prs_dict['covariates']) > 0:
            covariates = prs_dict['covariates']
            (betas, rss0, r, s) = linalg.lstsq(
                sp.hstack([covariates,
                           sp.ones((len(true_phens), 1))]), true_phens)
            Xs = sp.hstack([
                pval_derived_effects_prs, covariates,
                sp.ones((len(true_phens), 1))
            ])
            (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
            adj_pred_dict['cov_adj'] = sp.dot(Xs, betas)
            pred_r2 = 1 - rss_pd / rss0
            print(
                'Variance explained (Pearson R2) by PRS adjusted for Covariates: %0.4f (%0.6f)'
                % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2'] = pred_r2
            pred_r2 = 1 - rss_pd / rss00
            print(
                'Variance explained (Pearson R2) by PRS + Cov: %0.4f (%0.6f)' %
                (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
            res_dict['Cov_adj_pred_r2+Cov'] = pred_r2

            if adjust_for_pcs and 'pcs' in prs_dict and len(
                    prs_dict['pcs']) and 'sex' in prs_dict and len(
                        prs_dict['sex']) > 0:
                pcs = prs_dict['pcs']
                sex = sp.array(prs_dict['sex'])
                sex.shape = (len(sex), 1)
                (betas, rss0, r, s) = linalg.lstsq(
                    sp.hstack(
                        [covariates, sex, pcs,
                         sp.ones((len(true_phens), 1))]), true_phens)
                Xs = sp.hstack([
                    pval_derived_effects_prs, covariates, sex, pcs,
                    sp.ones((len(true_phens), 1))
                ])
                (betas, rss_pd, r, s) = linalg.lstsq(Xs, true_phens)
                adj_pred_dict['cov_sex_pc_adj'] = sp.dot(Xs, betas)
                pred_r2 = 1 - rss_pd / rss0
                print(
                    'Variance explained (Pearson R2) by PRS adjusted for Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2'] = pred_r2
                pred_r2 = 1 - rss_pd / rss00
                print(
                    'Variance explained (Pearson R2) by PRS+Cov+PCs+Sex: %0.4f (%0.6f)'
                    % (pred_r2, (1 - pred_r2) / sp.sqrt(num_individs)))
                res_dict['Cov_PC_Sex_adj_pred_r2+Cov_PC_Sex'] = pred_r2

        # Now calibration
        y_norm = (true_phens - sp.mean(true_phens)) / sp.std(true_phens)
        denominator = sp.dot(pval_derived_effects_prs.T,
                             pval_derived_effects_prs)
        numerator = sp.dot(pval_derived_effects_prs.T, y_norm)
        regression_slope = (numerator / denominator)[0][0]
        if verbose:
            print('The slope for predictions with weighted effects is: %0.4f' %
                  regression_slope)

        num_individs = len(prs_dict['pval_derived_effects_prs'])

        # Write PRS out to file.
        if out_file != None:
            write_scores_file(out_file,
                              prs_dict,
                              pval_derived_effects_prs,
                              adj_pred_dict,
                              weights_dict=weights_dict)

    return res_dict
Exemplo n.º 38
0
    def from_networkx(cls, G, project=None):
        r"""
        Add data to an OpenPNM Network from a undirected NetworkX graph object.

        Parameters
        ----------
        G : networkx.classes.graph.Graph Object
            The NetworkX graph. G should be undirected. The numbering of nodes
            should be numeric (int's), zero-based and should not contain any
            gaps, i.e. ``G.nodes() = [0,1,3,4,5]`` is not allowed and should be
            mapped to ``G.nodes() = [0,1,2,3,4]``.

        project : OpenPNM Project object
            A GenericNetwork is created and added to the specified Project.
            If no Project is supplied then one will be created and returned.

        Returns
        -------
        An OpenPNM Project containing a GenericNetwork with all the data from
        the NetworkX object.

        """
        net = {}

        # Ensure G is an undirected networkX graph with numerically numbered
        # nodes for which numbering starts at 0 and does not contain any gaps
        if not isinstance(G, nx.Graph):
            raise ('Provided object is not a NetworkX graph.')
        if nx.is_directed(G):
            raise ('Provided graph is directed. Convert to undirected graph.')
        if not all(isinstance(n, int) for n in G.nodes()):
            raise ('Node numbering is not numeric. Convert to int.')
        if min(G.nodes()) != 0:
            raise ('Node numbering does not start at zero.')
        if max(G.nodes()) + 1 != len(G.nodes()):
            raise ('Node numbering contains gaps. Map nodes to remove gaps.')

        # Parsing node data
        Np = len(G)
        net.update({'pore.all': sp.ones((Np, ), dtype=bool)})
        for n, props in G.nodes(data=True):
            for item in props.keys():
                val = props[item]
                dtype = type(val)
                # Remove prepended pore. and pore_ if present
                for b in ['pore.', 'pore_']:
                    item = item.replace(b, '')
                # Create arrays for subsequent indexing, if not present already
                if 'pore.' + item not in net.keys():
                    if dtype == str:  # handle strings of arbitrary length
                        net['pore.' + item] = sp.ndarray((Np, ),
                                                         dtype='object')
                    elif dtype is list:
                        dtype = type(val[0])
                        if dtype == str:
                            dtype = 'object'
                        cols = len(val)
                        net['pore.' + item] = sp.ndarray((Np, cols),
                                                         dtype=dtype)
                    else:
                        net['pore.' + item] = sp.ndarray((Np, ), dtype=dtype)
                net['pore.' + item][n] = val

        # Parsing edge data
        # Deal with conns explicitly
        try:
            conns = list(G.edges)  # NetworkX V2
        except:
            conns = G.edges()  # NetworkX V1
        conns.sort()

        # Add conns to Network
        Nt = len(conns)
        net.update({'throat.all': sp.ones(Nt, dtype=bool)})
        net.update({'throat.conns': sp.array(conns)})

        # Scan through each edge and extract all its properties
        i = 0
        for t in conns:
            props = G[t[0]][t[1]]
            for item in props:
                val = props[item]
                dtype = type(val)
                # Remove prepended throat. and throat_ if present
                for b in ['throat.', 'throat_']:
                    item = item.replace(b, '')
                # Create arrays for subsequent indexing, if not present already
                if 'throat.' + item not in net.keys():
                    if dtype == str:
                        net['throat.' + item] = sp.ndarray((Nt, ),
                                                           dtype='object')
                    if dtype is list:
                        dtype = type(val[0])
                        if dtype == str:
                            dtype = 'object'
                        cols = len(val)
                        net['throat.' + item] = sp.ndarray((Nt, cols),
                                                           dtype=dtype)
                    else:
                        net['throat.' + item] = sp.ndarray((Nt, ), dtype=dtype)
                net['throat.' + item][i] = val
            i += 1

        network = GenericNetwork(project=project)
        network = cls._update_network(network=network, net=net)
        return network.project
Exemplo n.º 39
0
def entry_point():

    parser = OptionParser()

    # input files
    parser.add_option("--bfile", dest='bfile', type=str, default=None)
    parser.add_option("--pfile", dest='pfile', type=str, default=None)
    parser.add_option("--efile", dest='efile', type=str, default=None)
    parser.add_option("--ffile", dest='ffile', type=str, default=None)

    # output file
    parser.add_option("--ofile", dest='ofile', type=str, default=None)

    # phenotype filtering
    parser.add_option("--pheno_id", dest='pheno_id', type=str, default=None)

    # snp filtering options
    parser.add_option("--idx_start", dest='i0', type=int, default=None)
    parser.add_option("--idx_end", dest='i1', type=int, default=None)
    parser.add_option("--chrom", dest='chrom', type=int, default=None)
    parser.add_option("--pos_start", dest='pos_start', type=int, default=None)
    parser.add_option("--pos_end", dest='pos_end', type=int, default=None)

    # size of batches to load into memory
    parser.add_option(
        "--batch_size", dest='batch_size', type=int, default=1000)

    # analysis options
    parser.add_option("--rhos", dest='rhos', type=str, default=None)
    parser.add_option(
        "--unique_variants",
        action="store_true",
        dest='unique_variants',
        default=False)
    parser.add_option(
        "--no_interaction_test",
        action="store_true",
        dest='no_interaction_test',
        default=False)
    (opt, args) = parser.parse_args()

    # assert stuff
    assert opt.bfile is not None, 'Specify bed file!'
    assert opt.pfile is not None, 'Specify pheno file!'
    assert opt.efile is not None, 'Specify env file!'
    assert opt.ofile is not None, 'Specify out file!'
    if opt.rhos is None: opt.rhos = '0.,.2,.4,.6,.8,1.'

    # import geno and subset
    reader = BedReader(opt.bfile)
    query = build_geno_query(
        idx_start=opt.i0,
        idx_end=opt.i1,
        chrom=opt.chrom,
        pos_start=opt.pos_start,
        pos_end=opt.pos_end)
    reader.subset_snps(query, inplace=True)

    # pheno
    y = import_one_pheno_from_csv(
        opt.pfile, pheno_id=opt.pheno_id, standardize=True)

    # import environment
    E = sp.loadtxt(opt.efile)

    # import fixed effects
    if opt.ffile is None:
        covs = sp.ones((E.shape[0], 1))
    else:
        covs = sp.loadtxt(opt.ffile)

    # extract rhos
    rhos = sp.array(opt.rhos.split(','), dtype=float)

    # run analysis
    res = run_struct_lmm(
        reader,
        y,
        E,
        covs=covs,
        rhos=rhos,
        batch_size=opt.batch_size,
        no_interaction_test=opt.no_interaction_test,
        unique_variants=opt.unique_variants)

    # export
    print 'Export to %s' % opt.ofile
    make_out_dir(opt.ofile)
    res.to_csv(opt.ofile, index=False)
Exemplo n.º 40
0
 def Kdiag(self, theta, x1):
     sigma = SP.exp(2 * theta)
     return sigma * SP.ones(x1.shape[0])
Exemplo n.º 41
0
def fit_starcolumn(size, savepng):
    import pylab, scipy

    boxes = []
    coords = []
    for increment in [0, 0.03]:  # ,0.075,0.1]: #1,0.125,0.15,0.175]:
        #print size
        a, b, varp = pylab.hist(size,
                                bins=scipy.arange(0 + increment, 2 + increment,
                                                  0.06))
        #print a, b
        boxes += list(a)
        coords += list(b[:-1] + scipy.ones(len(b[:-1])) * (0.03))

    tot = scipy.array(boxes).sum()
    print tot
    all = zip(coords, boxes)
    all.sort(sortit_rev)
    print all

    sum = 0
    max = 0
    min = 1000000
    foundCenter = False
    from copy import copy
    print all, 'all'
    for x, y in all:
        print x, y, sum, tot
        sum += y
        if float(sum) / tot > 0.05:
            if y > max and not foundCenter:
                max = copy(y)
                max_x = copy(x)
                print 'max', max
            if y / max < 0.98 and not foundCenter:
                center = copy(max_x)
                print center, 'center'
                foundCenter = True
            if foundCenter:
                print 'min', min, y
                if min > y:
                    min = copy(y)
                    min_x = copy(x)
                print y, min
                if y / float(min) > 1.05:
                    right = copy(min_x)
                    break

    left = center - 1. * abs(right - center)
    print center, right, 'center, right'

    print len(boxes), len(coords)
    pylab.clf()
    pylab.scatter(coords, boxes)
    pylab.xlim(0, 2.5)
    pylab.xlabel('SIZE (arcsec)')
    pylab.axvline(x=center, ymin=-10, ymax=10)
    pylab.axvline(x=left, ymin=-10, ymax=10)
    pylab.axvline(x=right, ymin=-10, ymax=10)
    pylab.savefig(savepng)
    pylab.clf()

    return left, right
Exemplo n.º 42
0
def vl_phow(im,
            verbose=False,
            fast=True,
            sizes=[4, 6, 8, 10],
            step=2,
            color='rgb',
            floatdescriptors=False,
            magnif=6,
            windowsize=1.5,
            contrastthreshold=0.005):

    opts = Options(verbose, fast, sizes, step, color, floatdescriptors,
                   magnif, windowsize, contrastthreshold)
    dsiftOpts = DSiftOptions(opts)

    # make sure image is float, otherwise segfault
    im = array(im, 'float32')

    # Extract the features
    imageSize = shape(im)
    if im.ndim == 3:
        if imageSize[2] != 3:
            # "IndexError: tuple index out of range" if both if's are checked at the same time
            raise ValueError("Image data in unknown format/shape")
    if opts.color == 'gray':
        numChannels = 1
        if (im.ndim == 2):
            im = vl_rgb2gray(im)
    else:
        numChannels = 3
        if (im.ndim == 2):
            im = dstack([im, im, im])
        if opts.color == 'rgb':
            pass
        elif opts.color == 'opponent':
             # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
             # Note that the mean differs from the standard definition of opponent
             # space and is the regular intesity (for compatibility with
             # the contrast thresholding).
             # Note also that the mean is added pack to the other two
             # components with a small multipliers for monochromatic
             # regions.

            mu = 0.3 * im[:, :, 0] + 0.59 * im[:, :, 1] + 0.11 * im[:, :, 2]
            alpha = 0.01
            im = dstack([mu,
                         (im[:, :, 0] - im[:, :, 1]) / sqrt(2) + alpha * mu,
                         (im[:, :, 0] + im[:, :, 1] - 2 * im[:, :, 2]) / sqrt(6) + alpha * mu])
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
    if opts.verbose:
        print('{0}: color space: {1}'.format('vl_phow', opts.color))
        print('{0}: image size: {1} x {2}'.format('vl_phow', imageSize[0], imageSize[1]))
        print('{0}: sizes: [{1}]'.format('vl_phow', opts.sizes))

    frames_all = []
    descrs_all = []
    for size_of_spatial_bins in opts.sizes:
        # from https://github.com/vlfeat/vlfeat/blob/master/toolbox/sift/vl_phow.m
        # Recall from VL_DSIFT() that the first descriptor for scale SIZE has
        # center located at XC = XMIN + 3/2 SIZE (the Y coordinate is
        # similar). It is convenient to align the descriptors at different
        # scales so that they have the same geometric centers. For the
        # maximum size we pick XMIN = 1 and we get centers starting from
        # XC = 1 + 3/2 MAX(OPTS.SIZES). For any other scale we pick XMIN so
        # that XMIN + 3/2 SIZE = 1 + 3/2 MAX(OPTS.SIZES).
        # In pracrice, the offset must be integer ('bounds'), so the
        # alignment works properly only if all OPTS.SZES are even or odd.

        off = floor(3.0 / 2 * (max(opts.sizes) - size_of_spatial_bins)) + 1

        # smooth the image to the appropriate scale based on the size
        # of the SIFT bins
        sigma = size_of_spatial_bins / float(opts.magnif)
        ims = vl_imsmooth(im, sigma)

        # extract dense SIFT features from all channels
        frames = []
        descrs = []
        for k in range(numChannels):
            size_of_spatial_bins = int(size_of_spatial_bins)
            # vl_dsift does not accept numpy.int64 or similar
            f_temp, d_temp = vl_dsift(image=ims[:, :, k],
                                      step=dsiftOpts.step,
                                      size=size_of_spatial_bins,
                                      fast=dsiftOpts.fast,
                                      verbose=dsiftOpts.verbose,
                                      norm=dsiftOpts.norm,)
            frames.append(f_temp.T)
            descrs.append(d_temp.T)
        frames = array(frames)
        descrs = array(descrs)
        d_new_shape = [descrs.shape[0] * descrs.shape[1], descrs.shape[2]]
        descrs = descrs.reshape(d_new_shape)
        # remove low contrast descriptors
        # note that for color descriptors the V component is
        # thresholded
        if (opts.color == 'gray') | (opts.color == 'opponent'):
            contrast = frames[0][2, :]
        elif opts.color == 'rgb':
            contrast = mean([frames[0][2, :], frames[1][2, :], frames[2][2, :]], 0)
        else:
            raise ValueError('Color option ' + str(opts.color) + ' not recognized')
        descrs = descrs[:, contrast > opts.contrastthreshold]
        frames = frames[0][:, contrast > opts.contrastthreshold]
        # save only x,y, and the scale
        frames_temp = array(frames[0:3, :])
        padding = array(size_of_spatial_bins * ones(frames[0].shape))
        frames_to_add = vstack([frames_temp, padding])
        # print("Shape of frame for each window", frames_to_add.shape)
        # print("Shape of descriptors for each window", descrs.shape)
        # print("Sample Frame", frames_to_add[:,:1])
        frames_all.append(vstack([frames_temp, padding]))
        descrs_all.append(array(descrs))


    frames_all = hstack(frames_all)
    # print("length of descriptors ", len(descrs_all))
    descrs_all = hstack(descrs_all)
    # print("Frames Shape", frames_all.shape)
    # print("Descriptors shape", descrs_all.shape)
    # print(np.unique(descrs_all, return_counts=True))
    return frames_all.T[:,:2], descrs_all.T
Exemplo n.º 43
0
            self._allEvaluations = []
            
        tmp = [self._sample2base(self._produceSample()) for _ in range(self.batchSize)]
        list(map(self._oneEvaluation, tmp))            
        self._pointers = list(range(len(self._allEvaluated) - self.batchSize, len(self._allEvaluated)))                    
            
    def _learnStep(self):
        # produce samples
        self._produceSamples()
        samples = list(map(self._base2sample, self._population)) 
        
        #compute utilities
        utilities = self.shapingFunction(self._currentEvaluations)
        utilities /= sum(utilities)  # make the utilities sum to 1
        if self.uniformBaseline:
            utilities -= 1. / self.batchSize                           
                    
        # update center
        dCenter = dot(utilities, samples)
        self._center += self.centerLearningRate * self._sigmas * dCenter
        
        # update variances
        covGradient = dot(utilities, [s ** 2 - 1 for s in samples])        
        dA = 0.5 * self.covLearningRate * covGradient                                
        self._sigmas = self._sigmas * exp(dA)            
        
        
if __name__ == "__main__":
    from pybrain.rl.environments.functions.unimodal import ElliFunction
    print((SNES(ElliFunction(100), ones(100), verbose=True).learn()))
    
Exemplo n.º 44
0
def fit(colors, c1, c2, m, savepng):
    import pylab, scipy
    ''' essentially fine resolution binning '''
    boxes = []
    coords = []
    for increment in [0, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175]:
        a, b, varp = pylab.hist(colors,
                                bins=scipy.arange(-4 + increment,
                                                  4 + increment, 0.2))
        #print a, b
        boxes += list(a)
        coords += list(b[:-1] + scipy.ones(len(b[:-1])) * (0.1))

    print len(colors), colors, 'len'

    tot = scipy.array(boxes).sum()
    print tot

    solutions = []
    for version in ['reverse']:  #:,'forward']:

        left = -99
        center = -99

        all = zip(coords, boxes)
        if version == 'reverse':
            all.sort(sortit)
        if version == 'forward':
            all.sort(sortit_rev)

        print all
        pylab.clf()
        pylab.scatter(coords, boxes)
        #pylab.show()
        print 'plotted'

        sum = 0
        max_y = 0
        min = 1000000
        foundCenter = False
        from copy import copy
        print all, 'all'

        rev = zip(all[:][1], all[:][0])

        a = zip(boxes, coords)
        a.sort()
        peak = a[-1][1]

        foundCenter = False

        for x, y in all:
            print x, y, sum, tot

            print max_y, min, foundCenter, peak

            sum += y
            #print all[-1][0], all[0][0]

            if sum > 0:
                if float(
                        tot
                ) / sum > 0.05 and y > 100:  #True: # (all[-1][0] < all[0][0] and x < peak ) or (all[-1][0] > all[0][0] and x > peak ): #
                    if y > max_y and not foundCenter:
                        max_y = copy(y)
                        max_x = copy(x)
                        print 'max', max_y
                    print y / max_y, (max_y - y)
                    if y / max_y < 0.98 and (max_y -
                                             y) > 15 and not foundCenter:
                        center = copy(max_x)
                        print center, 'center', max_y
                        foundCenter = True
                #center = peak
                if foundCenter:
                    print 'min', min, y
                    if min > y:
                        min = copy(y)
                        min_x = copy(x)
                    print y, min, x
                    if y / float(min) > 1.04:
                        left = copy(min_x)
                        print peak, left, center, 'FOUND ONE'
                        break

        if left != -99:
            if left > center:
                left = center - max(0.05, abs(center - left))
            right = center + max(0.4, 1. * abs(left - center))
            print center, left, right, peak
            print right - peak, peak - left
            if True:  #right - peak > 0 and peak - left > 0:
                solutions.append([center, left, right])
    ''' pick out the narrower solution '''
    if len(solutions) > 1:
        if solutions[0][0] - solutions[0][1] < solutions[1][0] - solutions[1][
                1]:
            solution = solutions[0]
        else:
            solution = solutions[1]
    else:
        solution = solutions[0]

    center, left, right = solution

    print center, left, right
    print len(boxes), len(coords)

    #print boxes, coords
    pylab.clf()
    pylab.scatter(coords, boxes)
    pylab.xlabel(c1 + ' - ' + c2)
    pylab.axvline(x=center, ymin=-10, ymax=10)
    pylab.axvline(x=left, ymin=-10, ymax=10)
    pylab.axvline(x=right, ymin=-10, ymax=10)
    pylab.savefig(savepng)

    return left, right
Exemplo n.º 45
0
    xi = xi(r)

    def f_xiSB(r, am3, am2, am1, a0, a1):
        par = [am3, am2, am1, a0, a1]
        model = sp.zeros((len(par), r.size))
        tw = r != 0.
        model[0, tw] = par[0] / r[tw]**3
        model[1, tw] = par[1] / r[tw]**2
        model[2, tw] = par[2] / r[tw]**1
        model[3, tw] = par[3]
        model[4, :] = par[4] * r
        model = sp.array(model)
        return model.sum(axis=0)

    w = ((r >= sb1_rmin) & (r < sb1_rmax)) | ((r >= sb2_rmin) & (r < sb2_rmax))
    sigma = 0.1 * sp.ones(xi.size)
    sigma[(r >= sb1_rmin - 2.) & (r < sb1_rmin + 2.)] = 1.e-6
    sigma[(r >= sb2_rmax - 2.) & (r < sb2_rmax + 2.)] = 1.e-6
    popt, pcov = curve_fit(f_xiSB, r[w], xi[w], sigma=sigma[w])

    model = f_xiSB(r, *popt)
    xiSB = xi.copy()
    ww = (r >= sb1_rmin) & (r < sb2_rmax)
    xiSB[ww] = model[ww]

    pkSB = nbodykit.cosmology.correlation.xi_to_pk(r, xiSB, extrap=True)
    pkSB = pkSB(k)
    pkSB *= pk[-1] / pkSB[-1]

    out = fitsio.FITS(args.out, 'rw', clobber=True)
    head = [{'name': k, 'value': float(v)} for k, v in cat.items()]
Exemplo n.º 46
0
def run():
    from optparse import OptionParser

    usage = "usage: python redsequence [options] \n\nIdentifies and fits the red sequence using apparent magnitude and one color.\nOption of identifying star column and only using objects larger.\n"
    parser = OptionParser(usage)

    parser.add_option("-c",
                      "--cluster",
                      help="name of cluster (i.e. MACS0717+37)")
    parser.add_option("-d",
                      "--detectband",
                      help="detection band (i.e. W-J-V)",
                      default='W-J-V')
    parser.add_option(
        "--c1",
        help=
        "name of first filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-J-V)",
        default='MAG_APER1-SUBARU-COADD-1-W-J-V')
    parser.add_option(
        "--c2",
        help=
        "name of second filter in 'galaxy color' (i.e. MAG_APER1-SUBARU-COADD-1-W-C-RC)",
        default='MAG_APER1-SUBARU-COADD-1-W-C-RC')
    parser.add_option(
        "-m",
        '--m',
        help=
        "name of filter to be used as 'galaxy magnitude' (default is '--c2')",
        default=None)
    parser.add_option("-s",
                      "--starcolumn",
                      help="add to filter out star column",
                      action="store_true",
                      default=False)
    parser.add_option('--lm',
                      help="limiting magnitude applied to 'galaxy magnitude'",
                      default=False)
    parser.add_option(
        '-r',
        "--center_radius",
        help=
        "maximum galaxy radius from cluster center (in arcsec) (default=440)",
        default=660.)
    parser.add_option("-l",
                      "--location",
                      help="write output directory",
                      default=None)
    parser.add_option("-w",
                      "--web",
                      help="instead write to web (Pat's space)",
                      action="store_true",
                      default=False)
    parser.add_option(
        "-z",
        "--z",
        help=
        "see what the photometric redshifts are of redsequence galaxies (requires redshift catalog, obviously)",
        action='store_true',
        default=False)
    parser.add_option(
        "--cat",
        help=
        "name of alternate input catalog (if you don't want to use the default photometry catalog)",
        default=None)
    parser.add_option("--existingcolor",
                      help="use existing colors of red sequence fit",
                      action="store_true",
                      default=False)
    parser.add_option("-e",
                      "--existing",
                      help="use existing red sequence fit",
                      action="store_true",
                      default=False)

    (options, args) = parser.parse_args()

    if options.m is None:
        options.m = options.c2

    if options.location is not None and options.web:
        print 'Either specify location or web but not both at once'
        raise Exception

    if options.location is None and options.web is False:
        options.location = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/PHOTOMETRY_' + options.detectband + '_iso/'
    elif options.web:
        options.location = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + options.cluster + '/CWWSB_capak.list/'

    if options.location[-1] != '/':
        options.location = options.location + '/'
    print options.location
    import os

    if options.existingcolor or options.existing:
        dir = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + options.cluster + '/LENSING_' + options.detectband + '_' + options.detectband + '_aper/good/'
        dict = {}
        print 'file', dir + 'redseqfit_2.orig'
        redseqfit = open(dir + 'redseqfit_2.orig', 'r').readlines()
        slope = float(redseqfit[1].split('=')[1].split('*')[0])
        intercept = float(redseqfit[1][:-1].split('+')[1])

        upper_intercept = float(redseqfit[3][:-1].split('+')[1])
        lower_intercept = float(redseqfit[4][:-1].split('+')[1])

        polycoeffs = [slope, intercept]
        std = (upper_intercept - intercept) / 1.2

        info = open(dir + 'redseq_all.params', 'r').readlines()
        print info, dir + 'redseq_all.params'
        for l in info:
            if len(l.split(':')) > 1:
                key, value = l[:-1].split(': ')
                dict[key] = value

        print dict

        #options.center_radius = dict['radcut']

        def prefix(filt):
            if filt is 'g' or filt is 'r' or filt is 'u':
                return 'MAG_APER1-MEGAPRIME-COADD-1-' + filt
            else:
                return 'MAG_APER1-SUBARU-COADD-1-' + filt

        dict['slope'] = slope
        dict['intercept'] = intercept
        dict['lower_intercept'] = lower_intercept
        dict['upper_intercept'] = upper_intercept

        if options.existing:
            options.m = prefix(dict['xmag'])
            options.c1 = prefix(dict['greenmag'])
            options.c2 = prefix(dict['redmag'])
            options.lm = dict['magcut2']
            print 'finished'
        elif options.existingcolor:
            options.c1 = prefix(dict['greenmag'])
            options.c2 = prefix(dict['redmag'])

    cluster = options.cluster
    c1 = options.c1
    c2 = options.c2
    m = options.m

    if options.z:
        import pyfits
        cat = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.APER1.1.CWWSB_capak.list.all.bpz.tab'
        p = pyfits.open(cat)
        photoz = p['STDTAB'].data
        zero_IDs = len(photoz[photoz.field('SeqNr') == 0])
        if zero_IDs > 0:
            print 'Wrong photoz catalog?', cat
            print str(zero_IDs) + ' many SeqNr=0'
            raise Exception

        print cat

    if options.cat is None:  #not hasattr(options,'cat'):
        input_mags = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.slr.alter.cat'
    else:
        input_mags = options.cat

    import pyfits, os, sys, pylab, do_multiple_photoz, commands, re, math, scipy
    from copy import copy
    print 'input magnitude catalog:', input_mags, options.cat, hasattr(
        options, 'cat')

    filterlist = do_multiple_photoz.get_filters(input_mags, 'OBJECTS')
    #print filterlist

    print input_mags
    w = pyfits.open(input_mags)
    mags = w['OBJECTS'].data

    #print mags.field('Xpos')

    mask = mags.field(c1) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field(c2) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field(m) > -90
    if options.z: photoz = photoz[mask]
    mags = mags[mask]

    mask = mags.field('Flag') == 0
    if options.z: photoz_star = photoz[mask]
    mags_star = mags[mask]

    #mask = mags_star.field(c2) < 23
    ''' get cluster redshift '''
    command = 'grep ' + cluster + ' ' + '/nfs/slac/g/ki/ki05/anja/SUBARU/' + '/clusters.redshifts '
    print command
    cluster_info = commands.getoutput(command)
    cluster_redshift = float(re.split('\s+', cluster_info)[1])
    print cluster_redshift

    if options.lm:
        mag_cut = float(options.lm)
    else:
        ''' compute faint magnitude cutoff '''
        if m[-6:] == 'W-C-RC' or m[-1] == 'r':
            mag_cut = 21.5 + 2.5 * math.log10((cluster_redshift / 0.19)**2.)
        if m[-5:] == 'W-J-V' or m[-5:] == 'W-J-B' or m[-1] == 'g':
            mag_cut = 22. + 2.5 * math.log10((cluster_redshift / 0.19)**2.)

    if not options.center_radius:
        ''' compute radial size of cut '''
        options.center_radius = 400 / (z / 0.4)

    options.center_radius = 400

    print mag_cut, options.lm

    if True:  #not options.existing:
        ''' identify star column (optional) '''
        if options.starcolumn:
            savepng = '/nfs/slac/g/ki/ki04/pkelly/photoz/' + cluster + '/seeing.png'
            left, right = fit_starcolumn(
                mags_star[mask].field('FLUX_RADIUS') * 0.2, savepng)

            savepng = options.location + 'column.png'

            pylab.axvline(x=left, ymin=-10, ymax=100)
            pylab.axvline(x=right, ymin=-10, ymax=100)
            pylab.scatter(mags.field('FLUX_RADIUS') * 0.2,
                          mags.field(m),
                          s=0.25)
            pylab.xlim(0, 2.5)
            pylab.xlabel('SIZE (arcsec)')
            pylab.ylabel(m)
            pylab.savefig(savepng)
            pylab.clf()

            mask = mags.field('FLUX_RADIUS') * 0.2 > right
            if options.z: photoz = photoz[mask]
            mags = mags[mask]
        ''' select galaxies near center of field '''
        #options.center_radius=240
        mask = ((mags.field('Xpos') - 5000. * scipy.ones(len(mags)))**2. +
                (mags.field('Ypos') - 5000. * scipy.ones(len(mags)))**
                2.)**0.5 * 0.2 < float(options.center_radius)
        if options.z: photoz = photoz[mask]
        mags = mags[mask]

        print len(mags)
        if options.z: print len(photoz)

        from copy import copy
        mags_mask = copy(mags)
        x = copy(mags.field(m))
        y = copy(mags.field(c1) - mags.field(c2))

        print mags.field(c1), mags.field(c2), c1, c2

        mask = x < mag_cut

        print mag_cut
        #print x, y

        savedir = options.location
        os.system('mkdir -p ' + savedir)

        savepng = options.location + 'redselection.png'

        print options.center_radius, len(y[mask])
        left, right = fit(y[mask], c1, c2, m, savepng)

        if options.z:
            mask = photoz.field('NFILT') > 3
            reg_mags = mags_mask[mask]
            reg_photoz = photoz[mask]
            mask = photoz.field('BPZ_ODDS') > 0.95
            reg_mags = mags_mask[mask]
            reg_photoz = photoz[mask]

            print len(reg_photoz)

            print 'making reg'
            reg = open('all.reg', 'w')
            reg.write(
                'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
            )
            for i in range(len(reg_mags.field('Xpos'))):
                reg.write('circle(' + str(reg_mags.field('Xpos')[i]) + ',' +
                          str(reg_mags.field('Ypos')[i]) + ',' + str(5) +
                          ') # color=red width=2 text={' +
                          str(reg_photoz.field('BPZ_Z_B')[i]) + '}\n')
            reg.close()

            print 'finished reg'

        mask = x < mag_cut
        if options.z:
            photoz2 = photoz[mask]
            mags_mask = mags_mask[mask]
        x2 = x[mask]
        y2 = y[mask]

        #print sorted(x2)
        print savepng

        print left, right

        if not options.existing:
            mask = y2 > left
            if options.z:
                photoz2 = photoz2[mask]
                mags_mask = mags_mask[mask]
            x2 = x2[mask]
            y2 = y2[mask]

            mask = y2 < right
            if options.z:
                photoz2 = photoz2[mask]
                mags_mask = mags_mask[mask]
            x2 = x2[mask]
            y2 = y2[mask]

        if not options.existing: polycoeffs = scipy.polyfit(x2, y2, 1)
        print polycoeffs

        yfit = scipy.polyval(polycoeffs, x2)

        print x2, yfit
        if not options.existing: std = scipy.std(abs(yfit - y2))
        print std
        mask = abs(yfit - y2) < std * 2.5
        if options.z: photoz3 = photoz2[mask]
        x3 = x2[mask]
        y3 = y2[mask]

        if not options.existing: polycoeffs = scipy.polyfit(x3, y3, 1)

        print polycoeffs
        yfit = scipy.polyval(polycoeffs, sorted(x2))
        print x2, yfit
        if not options.existing: std = scipy.std(abs(yfit - y2))
        print std
        std_fac = 1.2

    mask = abs(yfit - y2) < std * std_fac
    if options.z:
        photoz2 = photoz2[mask]
        mags_mask = mags_mask[mask]
        print photoz2.field('SeqNr')
        print photoz2.field('BPZ_Z_B')

        fred = '/nfs/slac/g/ki/ki05/anja/SUBARU/' + cluster + '/PHOTOMETRY_' + options.detectband + '_aper/' + cluster + '.redseq'

        f = open(fred, 'w')
        for id in photoz2.field('SeqNr'):
            f.write(str(id) + '\n')
        f.close()

        reg = open('regseq.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags_mask.field('Xpos'))):
            reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' +
                      str(mags_mask.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=green width=2 text={' +
                      str(photoz2.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.clf()

    savepng = options.location + 'redhistogram.png'
    savepdf = options.location + 'redhistogram.pdf'

    if options.z:
        lower_lim = cluster_redshift - 0.3
        if lower_lim < 0: lower_lim = 0.0001
        print photoz2.field('BPZ_Z_B')
        a, b, varp = pylab.hist(photoz2.field('BPZ_Z_B'),
                                bins=scipy.arange(lower_lim,
                                                  cluster_redshift + 0.3,
                                                  0.01),
                                color='red')
        pylab.axvline(x=cluster_redshift,
                      ymin=0,
                      ymax=100,
                      color='blue',
                      linewidth=3)
        pylab.xlabel('Redshift')
        pylab.ylabel('Galaxies')
        pylab.savefig(savepng)
        pylab.savefig(savepdf)

        reg = open('reg.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags_mask.field('Xpos'))):
            reg.write('circle(' + str(mags_mask.field('Xpos')[i]) + ',' +
                      str(mags_mask.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=blue width=2 text={' +
                      str(photoz2.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.clf()
    pylab.plot(sorted(x2), yfit, 'b-')
    pylab.plot(sorted(x2), yfit + scipy.ones(len(yfit)) * std * std_fac, 'b-')
    pylab.plot(sorted(x2), yfit - scipy.ones(len(yfit)) * std * std_fac, 'b-')
    pylab.scatter(x, y, color='red', s=0.5)
    pylab.axhline(y=left, xmin=-10, xmax=100)
    pylab.axvline(x=mag_cut, ymin=-10, ymax=10)
    pylab.axhline(y=right, xmin=-10, xmax=100)
    pylab.xlabel(m)
    pylab.ylabel(c1 + ' - ' + c2)

    if options.z:
        mask = abs(photoz.field('BPZ_Z_B') - cluster_redshift) < 0.04
        mags = mags[mask]
        photoz = photoz[mask]

        mask = photoz.field('NFILT') > 4
        mags = mags[mask]
        photoz = photoz[mask]

        print 'priormag'
        print photoz.field('priormag')
        print 'nfilt'
        print photoz.field('NFILT')

        import pylab
        x = mags.field(m)
        y = mags.field(c1) - mags.field(c2)
        pylab.scatter(x, y, s=0.5)

        reg = open('reg.reg', 'w')
        reg.write(
            'global color=green font="helvetica 10 normal" select=1 highlite=1 edit=1 move=1 delete=1 include=1 fixed=0 source\nphysical\n'
        )
        for i in range(len(mags.field('Xpos'))):
            reg.write('circle(' + str(mags.field('Xpos')[i]) + ',' +
                      str(mags.field('Ypos')[i]) + ',' + str(5) +
                      ') # color=red width=2 text={' +
                      str(photoz.field('BPZ_Z_B')[i]) + '}\n')
        reg.close()

    pylab.xlim(sorted(x)[0], sorted(x)[-2])
    span = (sorted(y)[-2] - sorted(y)[2]) / 2
    if span > 1: span = 1
    median = scipy.median(scipy.array(y))
    pylab.ylim(median - 2, median + 2)

    savepng = options.location + 'cmd.png'
    pylab.savefig(savepng)

    pylab.clf()
    pylab.scatter(mags.field('Xpos'), mags.field('Ypos'), s=0.02)
    pylab.xlim([0, 10000])
    pylab.ylim([0, 10000])
    pylab.xlabel('X Pixel')
    pylab.ylabel('Y Pixel')

    savepng = options.location + '/positions.png'
    print savepng
    pylab.savefig(savepng)

    s = "\nBest fit: y = " + str(polycoeffs[0]) + "*x +" + str(
        polycoeffs[1]) + '\n'
    s += "\nCut: y < " + str(
        polycoeffs[0]) + "*x +" + str(polycoeffs[1] + std_fac * std) + '\n'
    s += "Cut: y > " + str(
        polycoeffs[0]) + "*x +" + str(polycoeffs[1] - std_fac * std) + '\n'
    s += "x < " + str(mag_cut) + '\n'
    s += 'x = ' + m + '\n'
    s += 'y = ' + c1 + ' - ' + c2 + '\n'

    print s

    f = open(options.location + '/redseqfit', 'w')
    f.write(s)
    f.close()

    from datetime import datetime
    t2 = datetime.now()

    print options.location
    f = open(options.location + '/redsequence.html', 'w')
    f.write(
        '<html><tr><td>' + t2.strftime("%Y-%m-%d %H:%M:%S") +
        '</td></tr><tr><td><h2>Photometric Redshifts of the Red Sequence</h2></td></tr><tr><td><img src="redhistogram.png"></img></td></tr><tr><td><img src="seeing.png"></img></td></tr><<tr><td><img src="column.png"></img></td></tr><tr><td><img src="redselection.png"></img></td></tr><tr><td><img src="cmd.png"></img></td></tr><tr><td><img src="positions.png"></img></td></tr><tr><td>'
        + s.replace('\n', '<br>') + '</td></tr>        </html>')

    print 'Wrote output to:', options.location
    print 'Best fit parameters in:', options.location + '/redseqfit'
Exemplo n.º 47
0
    def fitLMM(self,
               K=None,
               tech_noise=None,
               idx=None,
               i0=None,
               i1=None,
               verbose=False):
        """
		Args:
			K:				list of random effects to be considered in the analysis
							if K is none, it does not consider any random effect
			idx:			indices of the genes to be considered in the analysis
			i0:				gene index from which the anlysis starts
			i1:				gene index to which the analysis stops
			verbose:		if True, print progresses
		Returns:
			pv:				matrix of pvalues
			beta:			matrix of correlations
			info:			dictionary annotates pv and beta rows and columns, containing
							gene_idx_row:	index of the genes in rows
							conv:		boolean vetor marking genes for which variance decomposition has converged
							gene_row:   annotate rows of matrices
		"""
        assert self.var is not None, 'scLVM:: when multiple hidden factors are considered, varianceDecomposition decomposition must be used prior to this method'
        #		print QTL

        if idx is None:
            if i0 is None or i1 is None:
                i0 = 0
                i1 = self.G
            idx = SP.arange(i0, i1)
        elif not isinstance(idx, SP.ndarray):
            idx = SP.array([idx])

        if K is not None and not isinstance(K, list):
            K = [K]

        lmm_params = {
            'covs': SP.ones([self.N, 1]),
            'NumIntervalsDeltaAlt': 100,
            'NumIntervalsDelta0': 100,
            'searchDelta': True
        }

        Ystd = self.Y - self.Y.mean(0)
        Ystd /= self.Y.std(0)

        beta = SP.zeros((idx.shape[0], self.G))
        pv = SP.zeros((idx.shape[0], self.G))
        geneID = SP.zeros(idx.shape[0], dtype=str)
        count = 0
        var = self.var / self.var.sum(1)[:, SP.newaxis]
        for ids in idx:
            if verbose:
                print('.. fitting gene %d' % ids)
            # extract a single gene
            if K is not None:
                if len(K) > 1:
                    if self.var_info['conv'][count] == True:
                        _K = SP.sum(
                            [var[count, i] * K[i] for i in range(len(K))], 0)
                        _K /= _K.diagonal().mean()
                    else:
                        _K = None
                else:
                    _K = K[0]
            else:
                _K = None
            lm = QTL.test_lmm(Ystd,
                              Ystd[:, ids:ids + 1],
                              K=_K,
                              verbose=False,
                              **lmm_params)
            pv[count, :] = lm.getPv()[0, :]
            beta[count, :] = lm.getBetaSNP()[0, :]
            if self.geneID is not None: geneID[count] = self.geneID[ids]
            count += 1

        info = {'conv': self.var_info['conv'], 'gene_idx_row': idx}
        if geneID is not None: info['gene_row'] = geneID

        return pv, beta, info
Exemplo n.º 48
0
    def __init__(self, config):
        """A model of the spectrometer instrument, including spectral 
        response and noise covariance matrices. Noise is typically calculated
        from a parametric model, fit for the specific instrument.  It is a 
        function of the radiance level."""

        # If needed, skip first index column and/or convert to nanometers
        self.wavelength_file = config['wavelength_file']

        q = s.loadtxt(self.wavelength_file)
        if q.shape[1] > 2:
            q = q[:, 1:]
        if q[0, 0] < 100:
            q = q * 1000.0
        self.nchans = q.shape[0]
        self.wl = q[:, 0]
        self.fwhm = q[:, 1]
        self.bounds, self.scale, self.statevec = [], [], []

        # noise specified as parametric model.
        if 'SNR' in config:

            self.model_type = 'SNR'
            self.snr = float(config['SNR'])

        else:

            self.noise_file = config['noise_file']

            if self.noise_file.endswith('.txt'):

                # parametric version
                self.model_type = 'parametric'
                coeffs = s.loadtxt(self.noise_file,
                                   delimiter=' ',
                                   comments='#')
                p_a = interp1d(coeffs[:, 0],
                               coeffs[:, 1],
                               fill_value='extrapolate')
                p_b = interp1d(coeffs[:, 0],
                               coeffs[:, 2],
                               fill_value='extrapolate')
                p_c = interp1d(coeffs[:, 0],
                               coeffs[:, 3],
                               fill_value='extrapolate')
                self.noise = s.array([[p_a(w), p_b(w), p_c(w)]
                                      for w in self.wl])

            elif self.noise_file.endswith('.mat'):

                self.model_type = 'pushbroom'
                D = loadmat(self.noise_file)
                nb = len(self.wl)
                self.ncols = D['columns'][0, 0]
                if nb != s.sqrt(D['bands'][0, 0]):
                    raise ValueError(
                        'Noise model does not match wavelength # bands')
                cshape = ((self.ncols, nb, nb))
                self.covs = D['covariances'].reshape(cshape)

        self.integrations = config['integrations']

        # Variables not retrieved
        self.bvec = ['Cal_Relative_%04i' % int(w) for w in self.wl]

        if 'unknowns' in config:

            bval = []
            for key, val in config['unknowns'].items():
                if type(val) is str:
                    u = s.loadtxt(val, comments='#')
                    if (len(u.shape) > 0 and u.shape[1] > 1):
                        u = u[:, 1]
                else:
                    u = s.ones(len(self.wl)) * val
                bval.append(u)

            # unretrieved uncertainties combine via Root Sum Square...
            self.bval = s.sqrt(pow(s.array(bval), 2).sum(axis=0))

        else:
            # no unknowns - measurement noise only
            self.bval = s.zeros(len(self.wl))
Exemplo n.º 49
0
    def hessian(self,
                params,
                epsf,
                relativeScale=True,
                stepSizeCutoff=None,
                jacobian=None,
                verbose=False):
        """
        Returns the hessian of the model.

        epsf: Sets the stepsize to try
        relativeScale: If True, step i is of size p[i] * eps, otherwise it is
                       eps
        stepSizeCutoff: The minimum stepsize to take
        jacobian: If the jacobian is passed, it will be used to estimate
                  the step size to take.
        vebose: If True, a message will be printed with each hessian element
                calculated
        """

        nOv = len(params)
        if stepSizeCutoff is None:
            stepSizeCutoff = scipy.sqrt(_double_epsilon_)

        params = scipy.asarray(params)
        if relativeScale:
            eps = epsf * abs(params)
        else:
            eps = epsf * scipy.ones(len(params), scipy.float_)

    # Make sure we don't take steps smaller than stepSizeCutoff
        eps = scipy.maximum(eps, stepSizeCutoff)

        if jacobian is not None:
            # Turn off the relative scaling since that would overwrite all this
            relativeScale = False

            jacobian = scipy.asarray(jacobian)
            if len(jacobian.shape) == 0:
                resDict = self.resDict(params)
                new_jacobian = scipy.zeros(len(params), scipy.float_)
                for key, value in resDict.items():
                    new_jacobian += 2.0 * value * scipy.array(jacobian[0][key])
                jacobian = new_jacobian
            elif len(jacobian.shape) == 2:  # Need to sum up the total jacobian
                residuals = scipy.asarray(self.res(params))
                # Changed by rng7. I'm not sure what is meant by "sum up the
                # total jacobian". The following line failed due to shape
                # mismatch. From the context below, it seems that the dot
                # product is appropriate.
                #jacobian = 2.0*residuals*jacobian
                jacobian = 2.0 * scipy.dot(residuals, jacobian)

            # If parameters are independent, then
            #  epsilon should be (sqrt(2)*J[i])^-1
            factor = 1.0 / scipy.sqrt(2)
            for i in range(nOv):
                if jacobian[i] == 0.0:
                    eps[i] = 0.5 * abs(params[i])
                else:
                    # larger than stepSizeCutoff, but not more than
                    #  half of the original parameter value
                    eps[i] = min(
                        max(factor / abs(jacobian[i]), stepSizeCutoff),
                        0.5 * abs(params[i]))

## compute cost at f(x)
        f0 = self.cost(params)

        hess = scipy.zeros((nOv, nOv), scipy.float_)

        ## compute all (numParams*(numParams + 1))/2 unique hessian elements
        for i in range(nOv):
            for j in range(i, nOv):
                hess[i][j] = self.hessian_elem(self.cost, f0, params, i, j,
                                               eps[i], eps[j], relativeScale,
                                               stepSizeCutoff, verbose)
                hess[j][i] = hess[i][j]

        return hess
Exemplo n.º 50
0
     Kallperm = sp.dot(Kallperm, Kallperm.T)
     Kallperm /= Kallperm.diagonal().mean()
     Kallperm += 1e-4 * sp.eye(Kallperm.shape[0])
     vcperm = VarianceDecomposition(Y)
     vcperm.addFixedEffect()
     vcperm.addRandomEffect(K=Kallperm)
     vcperm.addRandomEffect(is_noise=True)
     vcperm.optimize()
     permlm0 = vcnull.getLML() - vcperm.getLML()
     perm_file.write(
         "\t".join(map(str, [permlm0, permlm1])) + "\n")
 ## get trans PCs
 S_R, U_R = sp.linalg.eigh(Kc)
 F1 = U_R[:, ::-1][:, :10]
 # add an intercept term
 F1 = sp.concatenate([F1, sp.ones((F1.shape[0], 1))], 1)
 test = "lrt"  #specify type of statistical test
 lmm0 = qtl.test_lmm(snps=Msnps,
                     pheno=Y,
                     K=Kallstd,
                     covs=F1,
                     test=test)
 pvalues = lmm0.getPv(
 )  # 1xS vector of p-values (S=X.shape[1])
 betas = lmm0.getBetaSNP(
 )  # 1xS vector of effect sizes (S=X.shape[1])
 ses = lmm0.beta_ste  # 1xS vector of effect sizes standard errors (S=X.shape[1]
 RV = Mpos
 RV["pvaluesCisPCs"] = pvalues.T
 RV["betasCisPCs"] = betas.T
 RV["sesCisPCs"] = ses.T
Exemplo n.º 51
0
def add_reads_from_bam(blocks, filenames, types, filter=None, var_aware=False, primary_only=False, no_mm=False, unstranded=True, mm_tag='NM', cram_ref=None):
    # blocks coordinates are assumed to be in closed intervals

    #if filter is None:
    #    filter = dict()
    #    filter['intron'] = 20000
    #    filter['exon_len'] = 8
    #    filter['mismatch']= 1

    if not types: 
        print('add_reads_from_bam: nothing to do')
        return

    verbose = False
    pair = False

    pair = ('pair_coverage' in types)
    clipped = False

    if type(blocks).__module__ != 'numpy':
        blocks = sp.array([blocks])

    for b in range(blocks.shape[0]):

        introns_p = None
        introns_m = None

        if verbose and  b % 10 == 0:
            print('\radd_exon_track_from_bam: %i(%i)' % (b, blocks.shape[0]))
        block_len = int(blocks[b].stop - blocks[b].start)

        ## get data from bam
        if 'exon_track' in types:
            (introns_p, introns_m, coverage) = get_all_data(blocks[b], filenames, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) 
        if 'mapped_exon_track' in types:
            (introns_p, introns_m, mapped_coverage) = get_all_data(blocks[b], filenames, spliced=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) 
        if 'spliced_exon_track' in types:
            (introns_p, introns_m, spliced_coverage) = get_all_data(blocks[b], filenames, mapped=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref) 
        if 'polya_signal_track' in types:
            (introns_p, introns_m, polya_signals) = get_all_data_uncollapsed(blocks[b], filenames, filter=filter, clipped=True, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref)
        if 'end_signal_track' in types:
            (introns_p, introns_m, read_end_signals) = get_all_data_uncollapsed(blocks[b], filenames, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref)

        if 'intron_list' in types or 'intron_track' in types:
            if introns_p is None:
                (introns_p, introns_m, spliced_coverage) = get_all_data(blocks[b], filenames, mapped=False, filter=filter, var_aware=var_aware, primary_only=primary_only, no_mm=no_mm, mm_tag=mm_tag, cram_ref=cram_ref)
        if not introns_p is None:
            introns_p = sort_rows(introns_p)
        if not introns_m is None:
            introns_m = sort_rows(introns_m)

        # add requested data to block
        tracks = sp.zeros((0, block_len))
        intron_list = []
        for ttype in types:
            ## add exon track to block
            ##############################################################################
            if ttype == 'exon_track':
                tracks = sp.r_[tracks, coverage] 
            ## add mapped exon track to block
            ##############################################################################
            elif ttype == 'mapped_exon_track':
                tracks = sp.r_[tracks, mapped_coverage] 
            ## add spliced exon track to block
            ##############################################################################
            elif ttype == 'spliced_exon_track':
                tracks = sp.r_[tracks, spliced_coverage] 
            ## add intron coverage track to block
            ##############################################################################
            elif ttype == 'intron_track':
                intron_coverage = sp.zeros((1, block_len))
                if introns_p.shape[0] > 0:
                    for k in range(introns_p.shape[0]):
                        from_pos = max(0, introns_p[k, 0])
                        to_pos = min(block_len, introns_p[k, 1])
                        intron_coverage[from_pos:to_pos] += introns_p[k, 2]
                if introns_m.shape[0] > 0:
                    for k in range(introns_m.shape[0]):
                        from_pos = max(0, introns_m[k, 0])
                        to_pos = min(block_len, introns_m[k, 1])
                        intron_coverage[from_pos:to_pos] += introns_m[k, 2]
                tracks = sp.r_[tracks, intron_coverage] 
            ## compute intron list
            ##############################################################################
            elif ttype == 'intron_list':
                if introns_p.shape[0] > 0 or introns_m.shape[0] > 0:

                    ### filter introns for location relative to block
                    ### this is legacy behavior for matlab versions!
                    ### TODO - Think about keeping this? Make it a parameter?
                    k_idx = sp.where((introns_p[:, 0] > blocks[0].start) & (introns_p[:, 1] < blocks[0].stop))[0]
                    introns_p = introns_p[k_idx, :]
                    k_idx = sp.where((introns_m[:, 0] > blocks[0].start) & (introns_m[:, 1] < blocks[0].stop))[0]
                    introns_m = introns_m[k_idx, :]

                    if unstranded:
                        introns = sort_rows(sp.r_[introns_p, introns_m])
                    else:
                        if blocks[0].strand == '-':
                            introns = introns_m
                        else:
                            introns = introns_p
                    
                    if filter is not None and 'mincount' in filter:
                        take_idx = sp.where(introns[:, 2] >= filter['mincount'])[0]
                        if take_idx.shape[0] > 0:
                            intron_list.append(introns[take_idx, :])
                        else:
                            intron_list.append(sp.zeros((0, 3), dtype='uint32'))
                    else:
                        intron_list.append(introns)
                else:
                    intron_list.append(sp.zeros((0, 3), dtype='uint32'))
            ## add polya signal track
            ##############################################################################
            elif ttype == 'polya_signal_track':
                ### get only end positions of reads
                shp = polya_signals
                end_idx = shp[0] - 1 - polya_signals[:, ::-1].argmax(axis = 1)
                polya_signals = scipy.sparse.coo_matrix((sp.ones((shp[1],)), (sp.arange(shp[1]), end_idx)), shape = shp)
                tracks = sp.r_[tracks, polya_signals.sum(axis = 0)]
            ## add end signal track
            ##############################################################################
            elif ttype == 'end_signal_track':
                ### get only end positions of reads
                shp = end_signals
                end_idx = shp[0] - 1 - end_signals[:, ::-1].argmax(axis = 1)
                end_signals = scipy.sparse.coo_matrix((sp.ones((shp[1],)), (sp.arange(shp[1]), end_idx)), shape = shp)
                tracks = sp.r_[tracks, end_signals.sum(axis = 0)]
            else: 
                print('ERROR: unknown type of data requested: %s' % ttype, file=sys.stderr)
    
    if len(types) == 1 and types[0] == 'intron_list':
        return intron_list
    elif 'intron_list' in types:
        return (tracks, intron_list)
    else:
        return tracks
Exemplo n.º 52
0
def ex9(exclude=sc.array([1, 2, 3, 4]),
        plotfilename='ex9.png',
        zoom=False,
        bovyprintargs={}):
    """ex9: solve exercise 9

    Input:
       exclude  - ID numbers to exclude from the analysis
       zoom - zoom in
    Output:
       plot
    History:
       2009-05-27 - Written - Bovy (NYU)
    """
    #Read the data
    data = read_data('data_yerr.dat')
    ndata = len(data)
    nsample = ndata - len(exclude)
    nSs = 1001
    if zoom:
        Srange = [900, 1000]
    else:
        Srange = [0.001, 1500]
    Ss = sc.linspace(Srange[0], Srange[1], nSs)
    chi2s = sc.zeros(nSs)
    for kk in range(nSs):
        #Put the dat in the appropriate arrays and matrices
        Y = sc.zeros(nsample)
        A = sc.ones((nsample, 2))
        C = sc.zeros((nsample, nsample))
        yerr = sc.zeros(nsample)
        jj = 0
        for ii in range(ndata):
            if sc.any(exclude == data[ii][0]):
                pass
            else:
                Y[jj] = data[ii][1][1]
                A[jj, 1] = data[ii][1][0]
                C[jj, jj] = Ss[kk]
                yerr[jj] = data[ii][2]  #OMG, such bad code
                jj = jj + 1
        #Now compute the best fit and the uncertainties
        bestfit = sc.dot(linalg.inv(C), Y.T)
        bestfit = sc.dot(A.T, bestfit)
        bestfitvar = sc.dot(linalg.inv(C), A)
        bestfitvar = sc.dot(A.T, bestfitvar)
        bestfitvar = linalg.inv(bestfitvar)
        bestfit = sc.dot(bestfitvar, bestfit)
        chi2s[kk] = chi2(bestfit, A, Y, C)

    #Now plot the solution
    plot.bovy_print(**bovyprintargs)
    #Plot the best fit line
    xrange = Srange
    if zoom:
        yrange = [nsample - 4, nsample]
    else:
        yrange = [nsample - 10, nsample + 8]
    plot.bovy_plot(Ss,
                   chi2s,
                   'k-',
                   xrange=xrange,
                   yrange=yrange,
                   xlabel=r'$S$',
                   ylabel=r'$\chi^2$',
                   zorder=1)
    plot.bovy_plot(sc.array(Srange),
                   sc.array([nsample - 2, nsample - 2]),
                   'k--',
                   zorder=2,
                   overplot=True)
    #plot.bovy_plot(sc.array([sc.median(yerr**2.),sc.median(yerr**2.)]),
    #               sc.array(yrange),color='0.75',overplot=True)
    plot.bovy_plot(sc.array([sc.mean(yerr**2.),
                             sc.mean(yerr**2.)]),
                   sc.array(yrange),
                   color='0.75',
                   overplot=True)
    plot.bovy_end_print(plotfilename)

    return 0
Exemplo n.º 53
0
import scipy as sp
import matplotlib.pylab as pl

SIZE = 200
MAXTIME = 500
TFSF_POS = 50 # Index of electric field (included) from which total field starts
INTERFACE = 100 # E-field index (inclusive) from where the new medium starts
EPSILON_R = 9
MU_R = 1

ez = sp.zeros(SIZE)
hy = sp.zeros(SIZE)
imp0 = 377.0
snapshots = []
epsR = sp.ones(SIZE)
epsR[INTERFACE:] *= EPSILON_R
muR = sp.ones(SIZE)
muR[INTERFACE:] *= MU_R

for t in range(MAXTIME):
    # TODO: Find out why exactly there is a subtle difference in the incremental
    # electric and magnetic fields. Solution-wise, there is no noticable
    # difference.
    ezinc = sp.exp(-(t+0.5-(-0.5)-30) * (t+0.5-(-0.5)-30) / 100.0)
    hyinc = sp.exp(-(t-30) * (t-30) / 100.0) / imp0
    
    # TODO: Find out why the ABCs must be given *before* the corresponding 
    # update equation. I'd have thought that it should be done *after*.
    hy[-1] = hy[-2]
    
Exemplo n.º 54
0
def get_reads(fname, chr_name, start, stop, strand=None, filter=None, mapped=True, spliced=True, var_aware=None, collapse=False, primary_only=False, no_mm=False, mm_tag='NM', cram_ref=None):
    
    if not re.search(r'.[bB][aA][mM]$', fname) is None:
        infile = pysam.AlignmentFile(fname, 'rb')
    elif not re.search(r'.[cC][rR][aA][mM]$', fname) is None:
        infile = pysam.AlignmentFile(fname, 'rc', reference_filename=cram_ref, ignore_truncation=True)
    else:
        sys.stderr.write('Error: Unknown input alignment format for: %s\n' % fname)

    ### vectors to build sparse matrix
    i = []
    j = []

    read_cnt = 0
    introns_p = dict()
    introns_m = dict()

    if collapse:
        read_matrix = sp.zeros((1, stop - start), dtype='int')
    else:
        read_matrix = scipy.sparse.coo_matrix((sp.ones(0), ([], [])), shape = (0, stop - start), dtype='bool')

    length = stop - start

    #print >> sys.stderr, 'querying %s:%i-%i' % (chr_name, start, stop)
    ### TODO THIS IS A HACK
    if chr_name == 'MT':
        return (read_matrix, sp.zeros(shape=(0, 3), dtype='uint32'), sp.zeros(shape=(0, 3), dtype='uint32'))

    if infile.gettid(chr_name) > -1:
        ### pysam query is zero based in position (results are as well), all intervals are pythonic half open
        for read in infile.fetch(chr_name, start, stop, until_eof=True):
            
            ### check if we skip this read
            if filter_read(read, filter, spliced, mapped, strand, primary_only, var_aware, no_mm, mm_tag=mm_tag):
                continue

            tags = dict(read.tags)
            curr_read_stranded = ('XS' in tags)
            is_minus = False
            if curr_read_stranded:
                is_minus = (tags['XS'] == '-')
 
            ### get introns and covergae
            p = read.pos 
            for o in read.cigar:
                if o[0] == 3:
                    if is_minus:
                        try:
                            introns_m[(p, p + o[1])] += 1
                        except KeyError:
                            introns_m[(p, p + o[1])] = 1
                    else:
                        try:
                            introns_p[(p, p + o[1])] += 1
                        except KeyError:
                            introns_p[(p, p + o[1])] = 1
                if o[0] in [0, 2]:
                    _start = int(max(p-start, 0))
                    _stop = int(min(p + o[1] - start, stop - start))
                    if _stop < 0 or _start > length:
                        if o[0] in [0, 2, 3]:
                            p += o[1]
                        continue
                    if collapse:
                        read_matrix[0, _start:_stop] += 1
                    else:
                        r = sp.arange(_start, _stop)
                        i.extend([read_cnt] * len(r))
                        j.extend(r)
                        #for pp in range(p, p + o[1]):
                        #    if pp - start >= 0 and pp < stop:
                        #        i.append(read_cnt)
                        #        j.append(pp - start)
                if o[0] in [0, 2, 3]:
                    p += o[1]

            ### the follwoing is new behavior and gonne come in the next version --> deletions are not counted towards coverage
            #### get coverage
            #for p in read.positions:
            #    if p - start >= 0:
            #        if p >= stop:
            #            break
            #        else:
            #            i.append(read_cnt)
            #            j.append(p - start)

            read_cnt += 1

        ### construct sparse matrix
        if not collapse:
            try:
                i = sp.array(i, dtype='int')
                j = sp.array(j, dtype='int')
                read_matrix = scipy.sparse.coo_matrix((sp.ones(i.shape[0]), (i, j)), shape = (read_cnt, stop - start), dtype='bool')
            except ValueError:
                step = 1000000
                _k = step
                assert len(i) > _k
                read_matrix = scipy.sparse.coo_matrix((sp.ones(_k), (i[:_k], j[:_k])), shape = (read_cnt, stop - start), dtype='bool')
                while _k < len(i):
                    _l = min(len(i), _k + step)
                    read_matrix += scipy.sparse.coo_matrix((sp.ones(_l - _k), (i[_k:_l], j[_k:_l])), shape = (read_cnt, stop - start), dtype='bool')                
                    _k = _l

    ### convert introns into scipy array
    if len(introns_p) >= 1:
        introns_p = sp.array([[k[0], k[1], v] for k, v in introns_p.items()], dtype='uint32')
        introns_p = sort_rows(introns_p)
    else:
        introns_p = sp.zeros(shape=(0, 3), dtype='uint32')
    if len(introns_m) >= 1:
        introns_m = sp.array([[k[0], k[1], v] for k, v in introns_m.items()], dtype='uint32')
        introns_m = sort_rows(introns_m)
    else:
        introns_m = sp.zeros(shape=(0, 3), dtype='uint32')

    return (read_matrix, introns_p, introns_m) 
Exemplo n.º 55
0
def simulate_data(N=200,
                  seed=1234567,
                  views=["0", "1", "2", "3"],
                  D=[500, 200, 500, 200],
                  noise_level=1,
                  K=4,
                  G=1,
                  lscales=[0.2, 0.8, 0.0, 0.0],
                  sample_cov="equidistant",
                  scales=[1, 0.8, 0, 0],
                  shared=True,
                  plot=False):
    """
    Function to simulate test data for MOFA (without ARD or spike-and-slab on factors)

    N: Number of time points/ samples per group
    seed: seed to use for simulation
    views: list of view names
    K: number of factors
    G: Number of groups
    D: list of number of features per view (same length as views)
    noise_level: variance of the residuals (1/tau);
                 per feature it is multiplied by a uniform random number in [0.5, 1.5] to model differences in features' noise
    scales, lscales: hyperparameters of the GP per factor (length as given by K)
    sample_cov: sample_covariates to use (can be of shape N X C) or "equidistant" or None
    shared: A list or single boolean indicating for each factor whether it is perfectly shared across groups or not.
    For non-shared ones pairwise group-group correlations are simulated by a Bernoulli distribution.
    Only relevant for factors with lengthscale and scale > 0.
    plot: If True, simulation results are plotted
    """

    # simulate some test data
    np.random.seed(seed)
    M = len(views)
    N = int(N)
    if type(shared) == bool:
        shared = [shared]
    if len(shared) == 1:
        shared = [shared] * K

    groupidx = np.repeat(range(G), N)  # kronecker structure
    if not sample_cov is None:
        if sample_cov == "equidistant":
            sample_cov = np.linspace(0, 1, N)
            sample_cov = sample_cov.reshape(N, 1)
        else:
            assert sample_cov.shape[
                0] == N, "Number of rows of sample_cov and N does not match"
            if len(np.repeat(np.arange(0, 100, 1), 2).shape) == 1:
                sample_cov = sample_cov.reshape(N, 1)
        distC = SS.distance.pdist(sample_cov, 'euclidean')**2.
        distC = SS.distance.squareform(distC)

    else:
        lscales = [0] * K

    Gmats = []
    for k in range(K):
        if scales[k] == 0 or lscales[k] == 0:  # group structure not modelled
            Gmat = np.eye(G)
        else:
            if shared[k]:
                Gmat = np.ones([G, G])
            else:
                x = np.random.uniform(-1, 1, G)
                Gmat = np.outer(x, x) + 0.5 * np.eye(G)
                Gmat = covar_to_corr(Gmat)
        Gmats.append(Gmat)

    # simulate Sigma
    Sigma = []
    for k in range(K):
        if lscales[k] > 0:
            Kmat = scales[k] * np.exp(-distC / (2 * lscales[k]**2))
            Kmat = np.kron(Gmats[k], Kmat)
            Sigma.append(Kmat + (1 - scales[k]) * np.eye(N * G))
        elif lscales[k] == 0:
            Kmat = scales[k] * (distC == 0).astype(float)
            Kmat = np.kron(Gmats[k], Kmat)
            Sigma.append(Kmat + (1 - scales[k]) * np.eye(N * G))
            # Sigma.append(np.eye(N*G))
        else:
            sys.exit("All lengthscales need to be non-negative")

    # plot covariance structure
    if plot:
        fig, axs = plt.subplots(1, K, sharex=True, sharey=True)
        for k in range(K):
            sns.heatmap(Sigma[k], ax=axs[k])

    # simulate factor values
    Zks = []
    for k in range(K):
        sig = Sigma[k]
        Zks.append(np.random.multivariate_normal(np.zeros(N * G), sig, 1))
    Zks = np.vstack(Zks).transpose()

    Z = []
    for g in range(G):
        Z.append(Zks[groupidx == g, ])

    # simulate alpha and theta, each factor should be active in at least one view
    inactive = 1000
    active = 1
    theta = 0.5 * np.ones([M, K])
    alpha_tmp = [s.ones(M) * inactive] * K
    for k in range(K):
        while s.all(alpha_tmp[k] == inactive):
            alpha_tmp[k] = s.random.choice([active, inactive],
                                           size=M,
                                           replace=True)
    alpha = [s.array(alpha_tmp)[:, m] for m in range(M)]

    # simulate weights
    W = []
    for m in range(M):
        W.append(
            np.column_stack([
                np.random.normal(0, np.sqrt(1 / alpha[m][k]), D[m]) *
                np.random.binomial(1, theta[m][k], D[m]) for k in range(K)
            ]))

    # simulate heteroscedastic noise
    noise = []
    for m in range(M):
        tau_m = stats.uniform.rvs(
            loc=0.5, scale=1, size=D[m]
        ) * 1 / noise_level  # uniform between 0.5 and 1.5 scaled by noise level
        noise.append(
            np.random.multivariate_normal(np.zeros(D[m]),
                                          np.eye(D[m]) * 1 / tau_m, N))

    # generate data
    data = []
    for m in range(M):
        tmp = []
        for g in range(G):
            tmp.append(Z[g].dot(W[m].transpose()) + noise[m])
        data.append(tmp)

    # store as list of groups
    if not sample_cov is None:
        sample_cov = [sample_cov] * G

    return {
        'data': data,
        'W': W,
        'Z': Z,
        'noise': noise,
        'sample_cov': sample_cov,
        'Sigma': Sigma,
        'views': views,
        'lscales': lscales,
        'N': N,
        'Gmats': Gmats
    }
Exemplo n.º 56
0
def get_LDpred_ld_tables(snps, ld_radius=100, ld_window_size=0, h2=None, n_training=None, gm=None, gm_ld_radius=None):
    """
    Calculates LD tables, and the LD score in one go...
    """
    
    ld_dict = {}
    m, n = snps.shape
    print m, n
    ld_scores = sp.ones(m)
    ret_dict = {}
    if gm_ld_radius is None:
        for snp_i, snp in enumerate(snps):
            # Calculate D
            start_i = max(0, snp_i - ld_radius)
            stop_i = min(m, snp_i + ld_radius + 1)
            X = snps[start_i: stop_i]
            D_i = sp.dot(snp, X.T) / n
            r2s = D_i ** 2
            ld_dict[snp_i] = D_i
            lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32')
            ld_scores[snp_i] = lds_i
    else:
        assert gm is not None, 'Genetic map is missing.'
        window_sizes = []
        ld_boundaries = []
        for snp_i, snp in enumerate(snps):
            curr_cm = gm[snp_i] 
            
            # Now find lower boundary
            start_i = snp_i
            min_cm = gm[snp_i]
            while start_i > 0 and min_cm > curr_cm - gm_ld_radius:
                start_i = start_i - 1
                min_cm = gm[start_i]
            
            # Now find the upper boundary
            stop_i = snp_i
            max_cm = gm[snp_i]
            while stop_i > 0 and max_cm < curr_cm + gm_ld_radius:
                stop_i = stop_i + 1
                max_cm = gm[stop_i]
            
            ld_boundaries.append([start_i, stop_i])    
            curr_ws = stop_i - start_i
            window_sizes.append(curr_ws)
            assert curr_ws > 0, 'Some issues with the genetic map'

            X = snps[start_i: stop_i]
            D_i = sp.dot(snp, X.T) / n
            r2s = D_i ** 2
            ld_dict[snp_i] = D_i
            lds_i = sp.sum(r2s - (1 - r2s) / (n - 2), dtype='float32')
            ld_scores[snp_i] = lds_i
        
        avg_window_size = sp.mean(window_sizes)
        print 'Average # of SNPs in LD window was %0.2f' % avg_window_size
        if ld_window_size == 0:
            ld_window_size = avg_window_size * 2
        ret_dict['ld_boundaries'] = ld_boundaries
    ret_dict['ld_dict'] = ld_dict
    ret_dict['ld_scores'] = ld_scores
    
    if ld_window_size > 0:
        ref_ld_matrices = []
        inf_shrink_matrices = []
        for wi in range(0, m, ld_window_size):
            start_i = wi
            stop_i = min(m, wi + ld_window_size)
            curr_window_size = stop_i - start_i
            X = snps[start_i: stop_i]
            D = sp.dot(X, X.T) / n
            ref_ld_matrices.append(D)
            if h2 != None and n_training != None:
                A = ((m / h2) * sp.eye(curr_window_size) + (n_training / (1)) * D)
                A_inv = linalg.pinv(A)
                inf_shrink_matrices.append(A_inv)
        ret_dict['ref_ld_matrices'] = ref_ld_matrices
        if h2 != None and n_training != None:
            ret_dict['inf_shrink_matrices'] = inf_shrink_matrices
    return ret_dict
Exemplo n.º 57
0
def flood(im, regions=None, mode='max'):
    r"""
    Floods/fills each region in an image with a single value based on the
    specific values in that region.  The ``mode`` argument is used to
    determine how the value is calculated.

    Parameters
    ----------
    im : array_like
        An ND image with isolated regions containing 0's elsewhere.

    regions : array_like
        An array the same shape as ``im`` with each region labeled.  If None is
        supplied (default) then ``scipy.ndimage.label`` is used with its
        default arguments.

    mode : string
        Specifies how to determine which value should be used to flood each
        region.  Options are:

    *'max'* : Floods each region with the local maximum in that region

    *'min'* : Floods each region the local minimum in that region

    *'size'* : Floods each region with the size of that region

    Returns
    -------
    An ND-array the same size as ``im`` with new values placed in each
    forground voxel based on the ``mode``.

    See Also
    --------
    props_to_image

    """
    mask = im > 0
    if regions is None:
        labels, N = spim.label(mask)
    else:
        labels = sp.copy(regions)
        N = labels.max()
    I = im.flatten()
    L = labels.flatten()
    if mode.startswith('max'):
        V = sp.zeros(shape=N + 1, dtype=float)
        for i in range(len(L)):
            if V[L[i]] < I[i]:
                V[L[i]] = I[i]
    elif mode.startswith('min'):
        V = sp.ones(shape=N + 1, dtype=float) * sp.inf
        for i in range(len(L)):
            if V[L[i]] > I[i]:
                V[L[i]] = I[i]
    elif mode.startswith('size'):
        V = sp.zeros(shape=N + 1, dtype=int)
        for i in range(len(L)):
            V[L[i]] += 1
    im_flooded = sp.reshape(V[labels], newshape=im.shape)
    im_flooded = im_flooded * mask
    return im_flooded
Exemplo n.º 58
0
def hierarchical_kmeans_w_mlc(
    feat_mat,
    mlc_mats: list,
    use_freq,
    max_leaf_size=100,
    imbalanced_ratio=0.0,
    imbalanced_depth=100,
    spherical=True,
    seed=0,
    max_iter=20,
    threads=-1,
):
    """

    Parameters
    ----------
    feat_mat
    mlc_mats: list
        list of must link constraint matrix
    use_freq
    max_leaf_size
    imbalanced_ratio
    imbalanced_depth
    spherical
    seed
    max_iter
    threads

    Returns
    -------

    """

    global run_kmeans

    def run_kmeans(cluster, c1, c2, min_size, max_iter, spherical=True):
        if point_freq_global is None:
            indexer = kmeans(feat_mat_global[cluster], None, c1, c2, min_size,
                             max_iter, spherical)
        else:
            indexer = kmeans(
                feat_mat_global[cluster],
                point_freq_global[cluster],
                c1,
                c2,
                min_size,
                max_iter,
                spherical,
            )
        return cluster[indexer], cluster[~indexer]

    global kmeans

    def kmeans(feat_mat,
               freqs,
               c1=-1,
               c2=-1,
               min_size=50,
               max_iter=20,
               spherical=True):
        if c1 == -1:
            c1, c2 = sp.random.randint(feat_mat.shape[0]), sp.random.randint(
                1, feat_mat.shape[0])
        c1, c2 = feat_mat[c1], feat_mat[(c1 + c2) % feat_mat.shape[0]]
        old_indexer = sp.ones(feat_mat.shape[0]) * -1

        for _ in range(max_iter):
            scores = sp.squeeze(sp.asarray(feat_mat.multiply(c1 - c2).sum(1)))

            if freqs is None:
                indexer = get_split_wo_freq(scores=scores, min_size=min_size)
            else:
                indexer = get_split_w_freq(scores=scores,
                                           min_size=min_size,
                                           freqs=freqs)

            if sp.array_equal(indexer, old_indexer):
                break
            old_indexer = indexer
            c1 = feat_mat[indexer].sum(0)
            c2 = feat_mat[~indexer].sum(0)
            if spherical:
                c1 = normalize(c1)
                c2 = normalize(c2)
        return indexer

    global feat_mat_global, point_freq_global
    feat_mat_global = feat_mat
    point_freq_global = None

    random = sp.random.RandomState(seed)
    cluster_chain = []
    clusters_big, clusters_small = [], []
    if feat_mat.shape[0] > max_leaf_size:
        clusters_big.append(sp.arange(feat_mat.shape[0]))
    else:
        clusters_small.append(sp.arange(feat_mat.shape[0]))

    while (
            len(clusters_big) > 0
    ):  # Iterate until there is at least one cluster with > max_leaf_size nodes

        curr_level = len(cluster_chain)
        # Do balanced clustering beyond imbalanced_depth to ensure reasonably timely termination
        if curr_level >= imbalanced_depth:
            imbalanced_ratio = 0

        # Enact Must-link constraints by creating connected components based on must-link constraints
        if curr_level >= len(mlc_mats):
            """If there are no must-link constraints for this level onward, then append an identity matrix which
            says that the trivial thing that every point must link to itself!"""
            n = feat_mat.shape[0]
            mlc_mats.append(
                smat.csr_matrix(smat.diags(np.ones((n)), shape=(n, n))))

        clusters_big_cc = []
        feat_mat_cc = []
        cum_idx_cc = 0
        old_cc_to_new_cc = np.zeros((mlc_mats[curr_level].shape[1])) - 1
        new_cc_to_old_cc = np.zeros((mlc_mats[curr_level].shape[1])) - 1
        num_points_per_cc = []
        for cluster in clusters_big:

            # Get constraints mat and features mat rows for this cluster
            local_feat_mat = feat_mat[cluster]
            local_mlc_mat = mlc_mats[curr_level][cluster]

            # Find # non zero cols in local_mlc_mat. That'll be # conn components(= num_CC) over points in cluster
            num_points = len(cluster)
            non_zero_cols = np.diff(local_mlc_mat.tocsc().indptr).nonzero()[0]
            num_CC = non_zero_cols.shape[0]

            # Retain only non-zero cols in local_mlc_mat. Now it should be of shape num_points x num_CC
            local_mlc_mat = local_mlc_mat[:, non_zero_cols]
            local_num_points_per_cc = np.array(
                np.sum(local_mlc_mat.ceil(), axis=0, dtype=int)).reshape(-1)

            # Get feature vec for each conn component using points in that conn comp.
            # (# conn comp x # points) x (# points x # features) --> ( # conn comp x # features )
            local_feat_mat_w_mlc = local_mlc_mat.transpose() * local_feat_mat
            feat_mat_cc.append(local_feat_mat_w_mlc)
            num_points_per_cc.append(local_num_points_per_cc)

            assert local_mlc_mat.shape == (num_points, num_CC)
            assert local_feat_mat.shape == (num_points, feat_mat.shape[1])
            assert local_feat_mat_w_mlc.shape == (num_CC, feat_mat.shape[1])
            """ Assert that each cols sums to one, and sum of total matrix is equal to num_CC.
             This is important for correctness when getting conn comp vector using point vectors. """
            assert (np.round(np.sum(local_mlc_mat, axis=0)) == np.ones(
                (1, num_CC))).all()
            assert int(np.round(np.sum(local_mlc_mat))) == num_CC
            """ Give indices to each conn comp, offsetting it using cum_idx_cc which keeps track 
                of # conn comp so far, and add this list to cluster_big_cc """
            cc_idxs = np.arange(num_CC) + cum_idx_cc
            clusters_big_cc.append(cc_idxs)

            old_cc_to_new_cc[non_zero_cols] = cc_idxs
            new_cc_to_old_cc[cc_idxs] = non_zero_cols

            cum_idx_cc += num_CC

        feat_mat_global_cc = smat.csr_matrix(smat.vstack(feat_mat_cc))
        if use_freq:
            point_freq_global = np.concatenate(num_points_per_cc).reshape(-1)
            assert point_freq_global.shape == (feat_mat_global_cc.shape[0], )

        clusters_big = clusters_big_cc
        feat_mat_global = feat_mat_global_cc
        LOGGER.info("Shape of new  global feat matrix = {}".format(
            feat_mat_global.shape))

        num_parent_clusters = len(clusters_big) + len(clusters_small)
        new_clusters_big = []
        new_clusters_small = []
        cols_big, cols_small = [], [
            x + len(clusters_big) for x in range(len(clusters_small))
        ]
        seeds = [(random.randint(s), random.randint(1, s))
                 for s in map(len, clusters_big)]
        min_sizes = [
            int(s * (0.5 - imbalanced_ratio)) for s in map(len, clusters_big)
        ]

        with mp.Pool(threads if threads > 0 else mp.cpu_count()) as p:
            for col, child_clusters in enumerate(
                    p.starmap(
                        run_kmeans,
                        zip(
                            clusters_big,
                            *map(list, zip(*seeds)),
                            min_sizes,
                            repeat(max_iter),
                            repeat(spherical),
                        ),
                    )):
                for cluster_cc in child_clusters:
                    """cluster is a list of connected component indices.
                    Convert this list to list of indices of points in these connected components"""
                    # Map new conn_comp indices to old conn_comp indices
                    cluster_cc = new_cc_to_old_cc[cluster_cc]

                    # Get mlc matrix with only cols restricted to current list of conn components
                    local_mlc_mat = mlc_mats[curr_level][:, cluster_cc]
                    assert local_mlc_mat.shape == (feat_mat.shape[0],
                                                   len(cluster_cc))

                    # Get points in these conn components, which have non zero value in their corresponding row
                    cluster = np.diff(local_mlc_mat.indptr).nonzero()[0]
                    if len(cluster) > max_leaf_size and len(cluster_cc) > 1:
                        new_clusters_big.append(cluster)
                        cols_big.append(col)
                    elif len(cluster) > max_leaf_size and len(cluster_cc) == 1:
                        """Add to small clusters, even though this cluster has more than max_leaf_size points
                        because this cluster has just one connected component and thus can not split further due
                        to must-link constraints
                        """
                        new_clusters_small.append(cluster)
                        cols_small.append(col)
                    elif len(cluster) > max_leaf_size and len(cluster_cc) == 0:
                        # This condition is not possible but still having this for a sanity check
                        raise NotImplementedError
                    elif len(cluster) > 0:
                        new_clusters_small.append(cluster)
                        cols_small.append(col)
                    # else: # Do not raise error when a cluster is empty.
                    #     raise NotImplementedError

        cols = cols_big + cols_small

        clusters_small.extend(new_clusters_small)

        curr_clust_mat = smat.csc_matrix(
            (sp.ones(len(cols)), (range(len(cols)), cols)),
            shape=(len(new_clusters_big + clusters_small),
                   num_parent_clusters),
            dtype=sp.float32,
        )
        cluster_chain.append(curr_clust_mat)

        clusters_big = new_clusters_big

        LOGGER.info("Cluster chain shape at level = {} is {}".format(
            curr_level, curr_clust_mat.shape))

    C = []
    for col, cluster in enumerate(chain(clusters_big, clusters_small)):
        for row in cluster:
            C.append((row, col))

    cluster_mat_cc = smat.csc_matrix(
        (sp.ones(feat_mat.shape[0]), list(map(list, zip(*C)))),
        shape=(feat_mat.shape[0], len(clusters_big) + len(clusters_small)),
        dtype=sp.float32,
    )

    cluster_mat = smat.csc_matrix(mlc_mats[-1] * cluster_mat_cc,
                                  dtype=sp.float32)
    cluster_chain.append(cluster_mat)
    LOGGER.info("Cluster chain shape at final level is {}".format(
        cluster_mat.shape))
    return cluster_chain
Exemplo n.º 59
0
def parallel_compute_ll_matrix(gp, bounds, num_pts, num_proc=None):
    """Compute matrix of the log likelihood over the parameter space in parallel.
    
    Parameters
    ----------
    bounds : 2-tuple or list of 2-tuples with length equal to the number of free parameters
        Bounds on the range to use for each of the parameters. If a single
        2-tuple is given, it will be used for each of the parameters.
    num_pts : int or list of ints with length equal to the number of free parameters
        The number of points to use for each parameters. If a single int is
        given, it will be used for each of the parameters.
    num_proc : Positive int or None, optional
        Number of processes to run the parallel computation with. If set to
        None, ALL available cores are used. Default is None (use all available
        cores).
    
    Returns
    -------
    ll_vals : array
        The log likelihood for each of the parameter possibilities.
    param_vals : list of array
        The parameter values used.
    """
    if num_proc is None:
        num_proc = multiprocessing.cpu_count()

    present_free_params = gp.free_params

    bounds = scipy.atleast_2d(scipy.asarray(bounds, dtype=float))
    if bounds.shape[1] != 2:
        raise ValueError("Argument bounds must have shape (n, 2)!")
    # If bounds is a single tuple, repeat it for each free parameter:
    if bounds.shape[0] == 1:
        bounds = scipy.tile(bounds, (len(present_free_params), 1))
    # If num_pts is a single value, use it for all of the parameters:
    try:
        iter(num_pts)
    except TypeError:
        num_pts = num_pts * scipy.ones(bounds.shape[0], dtype=int)
    else:
        num_pts = scipy.asarray(num_pts, dtype=int)
        if len(num_pts) != len(present_free_params):
            raise ValueError(
                "Length of num_pts must match the number of free parameters of kernel!"
            )

    # Form arrays to evaluate parameters over:
    param_vals = []
    for k in xrange(0, len(present_free_params)):
        param_vals.append(
            scipy.linspace(bounds[k, 0], bounds[k, 1], num_pts[k]))

    pv_cases = list()
    gp_cases = list()
    num_pts_cases = list()
    for k in xrange(0, len(param_vals[0])):
        specific_param_vals = list(param_vals)
        specific_param_vals[0] = param_vals[0][k]
        pv_cases.append(specific_param_vals)

        gp_cases += [copy.deepcopy(gp)]

        num_pts_cases.append(num_pts)

    pool = multiprocessing.Pool(processes=num_proc)
    try:
        vals = scipy.asarray(
            pool.map(_compute_ll_matrix_wrapper,
                     zip(gp_cases, pv_cases, num_pts_cases)))
    finally:
        pool.close()

    return (vals, param_vals)
Exemplo n.º 60
0
data_subsample = data.subsample_phenotypes(phenotype_query=phenotype_query,intersection=True)

#get variables we need from data
snps = data_subsample.getGenotypes(impute_missing=True)
phenotypes,sample_idx = data_subsample.getPhenotypes(phenotype_query=phenotype_query,intersection=True); assert sample_idx.all()

sample_relatedness = data_subsample.getCovariance()
pos = data_subsample.getPos()

#set parameters for the analysis
N, P = phenotypes.shape          

covs = None                 #covariates
Acovs = None                #the design matrix for the covariates   
Asnps = SP.ones((1,P))      #the design matrix for the SNPs
K1r = sample_relatedness    #the first sample-sample covariance matrix (non-noise)
K2r = SP.eye(N)             #the second sample-sample covariance matrix (noise)
K1c = None                  #the first phenotype-phenotype covariance matrix (non-noise)
K2c = None                  #the second phenotype-phenotype covariance matrix (noise)
covar_type = 'freeform'     #the type of covariance matrix to be estimated for unspecified covariances 
searchDelta = False         #specify if delta should be optimized for each SNP
test="lrt"                  #specify type of statistical test

# Running the analysis
# when cov are not set (None), LIMIX considers an intercept (covs=SP.ones((N,1)))
lmm, pvalues = QTL.test_lmm_kronecker(snps,phenotypes.values,covs=covs,Acovs=Acovs,Asnps=Asnps,K1r=K1r,trait_covar_type=covar_type)

#convert P-values to a DataFrame for nice output writing:
pvalues = pd.DataFrame(data=pvalues.T,index=data_subsample.geno_ID,columns=['YJR139C'])
pvalues = pd.concat([pos,pvalues],join="outer",axis=1)