Пример #1
0
 def _findRobots(self):
     """ Finds the robots amoung the edges found
     """
     ## for each right edge find the next closest left edge. This forms an edge pair that could be robot 
     self.Robots = list()
     if len(self.RightEdges) == 0 or len(self.LeftEdges) == 0:
         return
         
     for rightedge in self.RightEdges:
         leftedge = self.LeftEdges[0]
         i = 1
         while leftedge < rightedge:
             if i >= len(self.LeftEdges):
                 break
             leftedge = self.LeftEdges[i]
             i = i + 1
             
         ## now calculate the distance between the two edges
         distance = self.__calculateDistanceBetweenEdges(leftedge, rightedge)
         
         if distance > self.MINIMUM_NAO_WIDTH and distance < self.MAXIMUM_NAO_WIDTH:
             x = self.CartesianData[0,rightedge:leftedge+1]
             y = self.CartesianData[1,rightedge:leftedge+1]
             r = self.PolarData[0,rightedge:leftedge+1]
             c = numpy.less(r, 409.5)
             x = numpy.compress(c, x)
             y = numpy.compress(c, y)                
             robotx = self.__averageObjectDistance(x)
             roboty = self.__averageObjectDistance(y)
             c = numpy.logical_and(numpy.less(numpy.fabs(x - robotx), self.MAXIMUM_NAO_WIDTH), numpy.less(numpy.fabs(y - roboty), self.MAXIMUM_NAO_WIDTH))
             x = numpy.compress(c, x)
             y = numpy.compress(c, y)
             robotr = math.sqrt(robotx**2 + roboty**2)
             robotbearing = math.atan2(roboty, robotx)
             self.Robots.append(Robot(robotx, roboty, robotr, robotbearing, x, y))
Пример #2
0
    def utest( self, score ):
        """
        Gives the Mann-Withney U test probability that the score is
        random.  See:

        Mason & Graham (2002) Areas beneath the relative operating
        characteristics (ROC) and relative operating levels (ROL)
        curves: Statistical significance and interpretation

        Note (1): P-values below ~1e-16 are reported as 0.0.
        See zprob() in Biskit.Statistics.stats!

        Note (2): the P-value does not distinguish between positive
        and negative deviations from random -- a ROC area of 0.1 will
        get the same P-value as a ROC area of 0.9.

        @param score: the score predicted for each item
        @type  score: [ float ]

        @return: 1-tailed P-value
        @rtype: float
        """
        sample1 = N.compress( self.positives, score )
        sample1 = sample1[-1::-1]  # invert order

        sample2 = N.compress( N.logical_not( self.positives ), score )
        sample2 = sample2[-1::-1]  # invert order

        sample1 = sample1.tolist()
        sample2 = sample2.tolist()

        p = stats.mannwhitneyu( sample1, sample2 )
        return p[1]
Пример #3
0
def whiskers_and_fliers(x, q1, q3, transformout=None):
    wnf = {}
    if transformout is None:
        transformout = lambda x: x

    iqr = q3 - q1
    # get low extreme
    loval = q1 - (1.5 * iqr)
    whislo = np.compress(x >= loval, x)
    if len(whislo) == 0 or np.min(whislo) > q1:
        whislo = q1
    else:
        whislo = np.min(whislo)

    # get high extreme
    hival = q3 + (1.5 * iqr)
    whishi = np.compress(x <= hival, x)
    if len(whishi) == 0 or np.max(whishi) < q3:
        whishi = q3
    else:
        whishi = np.max(whishi)

    wnf['fliers'] = np.hstack([
        transformout(np.compress(x < whislo, x)),
        transformout(np.compress(x > whishi, x))
    ])
    wnf['whishi'] = transformout(whishi)
    wnf['whislo'] = transformout(whislo)

    return wnf
Пример #4
0
def doit(input_file, output_file, regularization, wants_normalization):
	# Read the entire file.
	data= tuple(tuple(map(float, line.split(','))) for line in input_file)
	if len(data) == 0:
		print("no data", file=sys.stderr)
		return

	# Create X and Y indices.  Assume the last column contains the output and
	# the rest contain the inputs.
	y_index= len(data[0]) - 1
	x_indices= tuple(range(y_index))

	# Create and print the model parameters, normalizing the data if requested.
	data= np.array(data)
	x= np.compress(as_bools(x_indices), data, 1)
	mu= list(it.repeat(0.0, x.shape[1]))
	sigma= list(it.repeat(1.0, x.shape[1]))
	if wants_normalization:
		for i in range(x.shape[1]):
			mu[i]= np.mean(x[:,i])
			sigma[i]= np.std(x[:,i])
			if sigma[i] == 0.0:
				sigma[i]= 1.0
			x[:,i]= (x[:,i] - mu[i]) / sigma[i]
	y= np.compress(as_bools(y_index), data, 1).squeeze()
	model= MinimizedModel(x, y, regularization, mu, sigma)
	print(model, file=output_file)
 def setFlaggedImageRange(self):
   (nx,ny) = self.raw_image.shape
   num_elements = nx * ny
   if self._flags_array is None:
     if not self._nan_flags_array is None:
       flags_array = self._nan_flags_array.copy()
     else:
       flags_array = numpy.zeros((nx,ny),int);
   else:
     flags_array = self._flags_array.copy()
     if not self._nan_flags_array is None:
       flags_array = flags_array + self._nan_flags_array
   flattened_flags = numpy.reshape(flags_array,(num_elements,))
   if self.raw_image.dtype == numpy.complex64 or self.raw_image.dtype == numpy.complex128:
     real_array =  self.raw_image.real
     imag_array =  self.raw_image.imag
     flattened_real_array = numpy.reshape(real_array.copy(),(num_elements,))
     flattened_imag_array = numpy.reshape(imag_array.copy(),(num_elements,))
     real_flagged_array = numpy.compress(flattened_flags == 0, flattened_real_array)
     imag_flagged_array = numpy.compress(flattened_flags == 0, flattened_imag_array)
     flagged_image = numpy.zeros(shape=real_flagged_array.shape,dtype=self.raw_image.dtype)
     flagged_image.real = real_flagged_array
     flagged_image.imag = imag_flagged_array
   else:
     flattened_array = numpy.reshape(self.raw_image.copy(),(num_elements,))
     flagged_image = numpy.compress(flattened_flags == 0, flattened_array)
   self.setImageRange(flagged_image)
def fit_gauss_to_hist(binheights,
                     binedges,
                     binerrors,
                     fitmin=0,
                     fitmax=None,
                     p0=None,  # guesses for norm, mu, sigma
                     fitcolor="r"):

    left_binedges = binedges[:-1]
    
    if fitmax == None:
        fitmax = np.max(binedges)
    
    # cut data to values needed for fitting
    cut_mask = (left_binedges>fitmin)*(left_binedges<fitmax)
    fitx = np.compress(cut_mask, left_binedges)
    fity = np.compress(cut_mask, binheights)
    cut_binerrors = np.compress(cut_mask, binerrors)
    
    # p0 = [200.,meanguess,10.] 
    popt, pcov = scipy.optimize.curve_fit(gauss, fitx, fity,
                                          sigma=cut_binerrors,
                                          absolute_sigma=True,
                                          p0=p0)
    perr = np.sqrt(np.diag(pcov))
    # draw fitfunction
    xbase = np.linspace(fitmin,fitmax,1000)
    plt.plot(xbase, gauss(xbase, popt[0], popt[1], popt[2]),
             color=fitcolor,
             linewidth=2.) 
    print "optimized norm, mu, sigma:\n", popt
    print "corresponding errors\n", np.sqrt(np.diag(pcov))
    print "corresponding covariance matrix:\n", pcov
    return popt, pcov
Пример #7
0
 def _addChildren(self, parent_node_num, I, cur_depth, right_mask, left_mask, y):
     """Modifies self.nodes_dict, self.stack
     """
     # do the right branch
     r_tmp = numpy.compress(right_mask, y)
     if (r_tmp.shape[0] > 0): 
         # then there is a reason to add a right child
         r_node_num = self.num_nodes
         r_child = TreeNode()
         r_child.parent = parent_node_num
         r_child.constval = numpy.average(r_tmp)
         self.nodes_dict[parent_node_num].Rchild = r_node_num
         self.nodes_dict[r_node_num] = r_child
         self.stack.append( self.StackEntry(r_node_num, cur_depth+1,\
                                            numpy.compress(right_mask, I)))
         self.num_nodes += 1
    
     # do the left branch
     l_tmp = numpy.compress(left_mask, y)
     if (l_tmp.shape[0] > 0): 
         l_node_num = self.num_nodes
         l_child = TreeNode()
         l_child.parent = parent_node_num
         l_child.constval = numpy.average(l_tmp)
         self.nodes_dict[parent_node_num].Lchild = l_node_num
         self.nodes_dict[l_node_num] = l_child
         self.stack.append( self.StackEntry(l_node_num, cur_depth+1,\
                                            numpy.compress(left_mask, I)))
         self.num_nodes += 1
Пример #8
0
def getMaxPoints(arr):
    # [TODO] Work out for RGB rather than array, and maybe we don't need the filter, but hopefully speeds it up.
    # Reference http://scipy-cookbook.readthedocs.io/items/FiltFilt.html
    arra = filtfilt(b,a,arr)
    maxp = maxpoints(arra, order=(len(arra)/20), mode='wrap')
    minp = minpoints(arra, order=(len(arra)/20), mode='wrap')

    points = []

    for i in range(3):
        mas = np.equal(np.greater_equal(maxp,(i*(len(arra)/3))), np.less_equal(maxp,((i+1)*len(arra)/3)))
        k = np.compress(mas[0], maxp)
        if len(k)==0:
            continue
        points.append(sum(k)/len(k))

    if len(points) == 1:
        return points, []

    points = np.compress(np.greater_equal(arra[points],(max(arra)-min(arra))*0.40 + min(arra)),points)
    rifts = []
    for i in range(len(points)-1):
        mas = np.equal(np.greater_equal(minp, points[i]),np.less_equal(minp,points[i+1]))
        k = np.compress(mas[0], minp)
        rifts.append(k[arra[k].argmin()])

    return points, rifts
Пример #9
0
def calculate_switch_length(inheritance, positions, ignore_size=0,
                            index_only=False):
    assert inheritance.shape[0] == positions.size

    # only 1s and 2s are relevant
    exclude = np.any(inheritance < 3, axis=1)
    inh_copy = np.compress(exclude, inheritance.copy(), axis=0)

    forgiven = [forgive(col, ignore_size) for col in inh_copy.T]
    switches = [derive_position_switch_array(np.compress(fgv, col))
                for col, fgv in zip(inh_copy.T, forgiven)]

    filtered_pos = None
    if index_only:
        mean_length = [np.mean(s) for s in switches]
        medi_length = [np.median(s) for s in switches]
        maxi_length = [np.median(s) for s in switches]
    else:
        assert inheritance.shape[0] == positions.shape[0]
        pos = np.compress(exclude, positions)

        filtered_pos = [np.insert(np.take(np.compress(fgv, pos),
                                          sw.cumsum() - 1), 0, pos[0])
                        for fgv, sw in zip(forgiven, switches)]

        mean_length = np.array([np.mean(np.diff(f)) for f in filtered_pos])
        medi_length = np.array([np.median(np.diff(f)) for f in filtered_pos])
        maxi_length = np.array([np.max(np.diff(f)) for f in filtered_pos])

    return mean_length, medi_length, maxi_length, filtered_pos
Пример #10
0
 def _locate(self, x):
     '''
     Given a possible set of color data values, return the ones
     within range, together with their corresponding colorbar
     data coordinates.
     '''
     if isinstance(self.norm, (colors.NoNorm, colors.BoundaryNorm)):
         b = self._boundaries
         xn = x
         xout = x
     else:
         # Do calculations using normalized coordinates so
         # as to make the interpolation more accurate.
         b = self.norm(self._boundaries, clip=False).filled()
         # We do our own clipping so that we can allow a tiny
         # bit of slop in the end point ticks to allow for
         # floating point errors.
         xn = self.norm(x, clip=False).filled()
         in_cond = (xn > -0.001) & (xn < 1.001)
         xn = np.compress(in_cond, xn)
         xout = np.compress(in_cond, x)
     # The rest is linear interpolation with clipping.
     y = self._y
     N = len(b)
     ii = np.minimum(np.searchsorted(b, xn), N-1)
     i0 = np.maximum(ii - 1, 0)
     #db = b[ii] - b[i0]
     db = np.take(b, ii) - np.take(b, i0)
     db = np.where(i0==ii, 1.0, db)
     #dy = y[ii] - y[i0]
     dy = np.take(y, ii) - np.take(y, i0)
     z = np.take(y, i0) + (xn-np.take(b,i0))*dy/db
     return xout, z
Пример #11
0
    def utest( self, score ):
        """
        Gives the Mann-Withney U test probability that the score is
        random.  See:

        Mason & Graham (2002) Areas beneath the relative operating
        characteristics (ROC) and relative operating levels (ROL)
        curves: Statistical significance and interpretation

        @param score: the score predicted for each item
        @type  score: [ float ]

        @return: 1-tailed P-value
        @rtype: float
        """
        sample1 = N.compress( self.positives, score )
        sample1 = sample1[-1::-1]  # invert order

        sample2 = N.compress( N.logical_not( self.positives ), score )
        sample2 = sample2[-1::-1]  # invert order

        sample1 = sample1.tolist()
        sample2 = sample2.tolist()

        p = stats.mannwhitneyu( sample1, sample2 )
        return p[1]
Пример #12
0
 def _locate(self, x):
     '''
     Given a possible set of color data values, return the ones
     within range, together with their corresponding colorbar
     data coordinates.
     '''
     if isinstance(self.norm, (colors.NoNorm, colors.BoundaryNorm)):
         b = self._boundaries
         xn = x
         xout = x
     else:
         b = self.norm(self._boundaries, clip=False).filled()
         xn = self.norm(x, clip=False).filled()
         in_cond = (xn > -0.001) & (xn < 1.001)
         xn = np.compress(in_cond, xn)
         xout = np.compress(in_cond, x)
     y = self._y
     N = len(b)
     ii = np.minimum(np.searchsorted(b, xn), N-1)
     i0 = np.maximum(ii - 1, 0)
     db = np.take(b, ii) - np.take(b, i0)
     db = np.where(i0==ii, 1.0, db)
     dy = np.take(y, ii) - np.take(y, i0)
     z = np.take(y, i0) + (xn-np.take(b,i0))*dy/db
     return xout, z
Пример #13
0
 def stochasticPartition(self, data, partition):
     '''Split the data stochastically according to soft partition'''
     #sample = numpy.random.rand(partition.shape[0]) < partition
     sample = partition
     ldata = numpy.compress(1-sample, data, axis=0)
     rdata = numpy.compress(sample, data, axis=0)
     return (numpy.asarray(ldata), numpy.asarray(rdata))
Пример #14
0
    def create_projection_as_numeric_array_3D(self, attr_indices, **settings_dict):
        valid_data = settings_dict.get("valid_data")
        class_list = settings_dict.get("class_list")
        jitter_size = settings_dict.get("jitter_size", 0.0)

        if valid_data == None:
            valid_data = self.get_valid_list(attr_indices)
        if sum(valid_data) == 0:
            return None

        if class_list == None and self.data_has_class:
            class_list = self.original_data[self.data_class_index]

        xarray = self.no_jittering_scaled_data[attr_indices[0]]
        yarray = self.no_jittering_scaled_data[attr_indices[1]]
        zarray = self.no_jittering_scaled_data[attr_indices[2]]
        if jitter_size > 0.0:
            xarray += (np.random.random(len(xarray))-0.5)*jitter_size
            yarray += (np.random.random(len(yarray))-0.5)*jitter_size
            zarray += (np.random.random(len(zarray))-0.5)*jitter_size
        if class_list != None:
            data = np.compress(valid_data, np.array((xarray, yarray, zarray, class_list)), axis = 1)
        else:
            data = np.compress(valid_data, np.array((xarray, yarray, zarray)), axis = 1)
        data = np.transpose(data)
        return data
Пример #15
0
 def _render(self, gc, pts):
     with gc:
         gc.clip_to_rect(self.x, self.y, self.width, self.height)
         if not self.index:
             return
         name = self.selection_metadata_name
         md = self.index.metadata
         if name in md and md[name] is not None and len(md[name]) > 0:
             # FIXME: when will we ever encounter multiple masks in the list?
             sel_mask = md[name][0]
             sel_pts = np.compress(sel_mask, pts, axis=0)
             unsel_pts = np.compress(~sel_mask, pts, axis=0)
             color = list(self.color_)
             color[3] *= self.unselected_alpha
             outline_color = list(self.outline_color_)
             outline_color[3] *= self.unselected_alpha
             if unsel_pts.size > 0:
                 self.render_markers_func(gc, unsel_pts, self.marker, self.marker_size,
                         tuple(color), self.unselected_line_width, tuple(outline_color),
                         self.custom_symbol)
             if sel_pts.size > 0:
                 self.render_markers_func(gc, sel_pts, self.marker, self.marker_size,
                         self.selected_color_, self.line_width, self.outline_color_,
                         self.custom_symbol)
         else:
             self.render_markers_func(gc, pts, self.marker, self.marker_size,
                     self.color_, self.line_width, self.outline_color_,
                     self.custom_symbol)
Пример #16
0
 def lcylimits(self):
     """Determine the y-limts depending on what plots are selected """
     mask = (self.dtime > self.lcx1)*(self.dtime<self.lcx2)*(self.goodframes>0)
     if self.ratiovar.get():
         rarr=np.compress(mask,self.ratio)
         y1=rarr.min()
         y2=rarr.max()
         ylabel='Star1/Star2'
     else:
         if self.star2var.get() and self.star1var.get():
             cfarr=np.compress(mask,self.cflux).max()
             tfarr=np.compress(mask,self.tflux).max()
             y1=0
             y2=cfarr < tfarr and tfarr or cfarr
             ylabel='Star Flux'
         elif self.star2var.get():
             cfarr=np.compress(mask,self.cflux)
             y1=0
             y2=cfarr.max()
             ylabel='Star2 Flux'
         else:
             tfarr=np.compress(mask,self.tflux)
             y1=0
             y2=tfarr.max()
             ylabel='Star1 Flux'
     return y1, y2, ylabel
Пример #17
0
def calculate_realexptime(id_arr, utc_arr, dsec_arr, diff_arr, req_texp, utc_list):
    """Calculates the real exposure time.
    This makes the following assumptions:
    #. That the measurement after the turn of the second is a fiducial
    #. That there is an integer number of frames between each fiducial exposure
    #. We then set up a metric which is Y=np.sum(i-int(i)) where i=dt/t_exp
    #. Then the minimum of Y is found between the requested exposure time and the median time difference
    #. And the best exposure time is the time at that minimum

    returns median exposure time and real exposure time
    """
    t_exp=0

    # calculate the median time
    try:
        t_wrong=np.median(diff_arr)
    except:
        raise SaltError('Unable to calculate median time difference')

    # Compress the arrays to find those closest to the second mark
    mask=(dsec_arr<t_wrong)*(diff_arr>0)
    t=np.compress(mask,utc_arr)
    s=np.compress(mask,dsec_arr)
    id=np.compress(mask,id_arr)

    # Now set up the components in the equation
    try:
        t_start=t[0]
        dt=t[1:]-t[0]
    except Exception, e:
        msg='Unable to set up necessary arrays because %s' % e
        raise SaltError(msg)
Пример #18
0
Файл: main.py Проект: mrow4a/UNI
 def myzpk2tf(self, z, p, k):
         z = np.atleast_1d(z)
         k = np.atleast_1d(k)
         if len(z.shape) > 1:
                 temp = np.poly(z[0])
                 b = np.zeros((z.shape[0], z.shape[1] + 1), temp.dtype.char)
                 if len(k) == 1:
                         k = [k[0]] * z.shape[0]
                 for i in range(z.shape[0]):
                         b[i] = k[i] * poly(z[i])
         else:
                 b = k * np.poly(z)
         a = np.atleast_1d(np.poly(p))
         # Use real output if possible. Copied from numpy.poly, since
         # we can't depend on a specific version of numpy.
         if issubclass(b.dtype.type, np.complexfloating):
                 # if complex roots are all complex conjugates, the roots are real.
                 roots = np.asarray(z, complex)
                 pos_roots = np.compress(roots.imag > 0, roots)
                 neg_roots = np.conjugate(np.compress(roots.imag < 0, roots))
                 if len(pos_roots) == len(neg_roots):
                         if np.all(np.sort_complex(neg_roots) == np.sort_complex(pos_roots)):
                                 b = b.real.copy()
         if issubclass(a.dtype.type, np.complexfloating):
                 # if complex roots are all complex conjugates, the roots are real.
                 roots = np.asarray(p, complex)
                 pos_roots = np.compress(roots.imag > 0, roots)
                 neg_roots = np.conjugate(np.compress(roots.imag < 0, roots))
                 if len(pos_roots) == len(neg_roots):
                         if np.all(np.sort_complex(neg_roots) == np.sort_complex(pos_roots)):
                                 a = a.real.copy()
         return b, a
Пример #19
0
def contributions(Ilength, Olength, scale, kernel,kernel_width):
    # Antialiasing for downsizing
    if scale < 1:
        h = lambda x: kernel(x,scale)
        kernel_width = kernel_width/scale
    else:
        h = kernel

    # output space coordinate
    x            = np.arange(Olength, dtype = float)
    x.shape     += (1,)
    # input space coord so that 0.5 in Out ~ 0.5 in In, and 0.5+scale in Out ~
    # 0.5 + 1 in In
    u            = x/scale + 0.5*(-1+1.0/scale)
    left         = np.floor(u-kernel_width/2)
    P            = math.ceil(kernel_width) + 2
    indices      = left + np.arange(P)
    weights      = h(u - indices)
    norm         = np.sum(weights,axis=1)
    norm.shape  += (1,)
    weights      = weights/norm
    indices      = np.minimum(np.maximum(0,indices),Ilength-1)
    indices      = np.array(indices,dtype                      = int)


    kill    = np.ma.any(weights,0)
    weights = np.compress(kill,weights,1)
    indices = np.compress(kill,indices,1)
    return (weights,indices)
Пример #20
0
def test8():
    global L0, N

    L = deepcopy(L0)
    rho = zeros(N, 'double')

    rho[random.sample(xrange(N), N/2)] = 1

    print rho

    LI = linalg.inv(L)
    #print L
    #print LI
    #I = numpy.dot(L,LI)
    #I[abs(I)<0.001] = 0
    #print I

    t = numpy.greater(rho, 0)
    X = numpy.zeros((N,N))
    for i in xrange(N):
        X[0][i] = i
    print X
    LIC = numpy.compress(t, LI, 1)
    print LIC
    LIC = numpy.compress(t, LIC, 0)
    print LIC
    LICI = linalg.inv(LIC)
    print LICI
Пример #21
0
    def test_np_ufuncs(self):
        z = self.create_array(shape=(100, 100), chunks=(10, 10))
        a = np.arange(10000).reshape(100, 100)
        z[:] = a

        eq(np.sum(a), np.sum(z))
        assert_array_equal(np.sum(a, axis=0), np.sum(z, axis=0))
        eq(np.mean(a), np.mean(z))
        assert_array_equal(np.mean(a, axis=1), np.mean(z, axis=1))
        condition = np.random.randint(0, 2, size=100, dtype=bool)
        assert_array_equal(np.compress(condition, a, axis=0),
                           np.compress(condition, z, axis=0))
        indices = np.random.choice(100, size=50, replace=True)
        assert_array_equal(np.take(a, indices, axis=1),
                           np.take(z, indices, axis=1))

        # use zarr array as indices or condition
        zc = self.create_array(shape=condition.shape, dtype=condition.dtype,
                               chunks=10, filters=None)
        zc[:] = condition
        assert_array_equal(np.compress(condition, a, axis=0),
                           np.compress(zc, a, axis=0))
        zi = self.create_array(shape=indices.shape, dtype=indices.dtype,
                               chunks=10, filters=None)
        zi[:] = indices
        # this triggers __array__() call with dtype argument
        assert_array_equal(np.take(a, indices, axis=1),
                           np.take(a, zi, axis=1))
Пример #22
0
def unpack_data(path, delimiter, filtr=False, split_column=-1):
    """Measurements and errors are assumed to be alternating. The last
    pair of columns corresponds to the dependent variable
    while the preceeding are independent.

    If filtr is True, values larger than the error are removed.

    If split_column is given, the data is split into lumps with a column
    value in that column, e.g if split_column=(n-1) [n.b we count from 0] and
    the nth column contains trial number, chemical type etc. this value will
    be used to categorise the rest of the data and the other procedures
    will run sequentially on each category, as if they were in different files."""

    raw = np.loadtxt(path, delimiter=delimiter, skiprows=1)
    data_name = os.path.splitext(os.path.basename(path))[0]

    if split_column != -1:
        raws = split_file(raw, split_column, data_name)
    else:
        # Needed to generalise following iterative step.
        raws = [(data_name, raw)]
    for (name, raw) in raws:
        meas = raw[:, ::2].transpose()
        err = raw[:, 1::2].transpose()
        if filtr:
            test = (abs(meas) >= err).prod(axis=0)
            meas = np.compress(test, meas, axis=1)
            err = np.compress(test, err, axis=1)

        if meas.shape[0] == 2:
            A = (meas[:-1].ravel(), err[:-1].ravel())
            yield name, (A, (meas[-1], err[-1]))
        else:
            yield name, ((meas[:-1], err[:-1]), (meas[-1], err[-1]))
Пример #23
0
    def jiu(self):
        '''
        Define Joint Information Uncertainty coef., based on entropy., for discrete values
        Coefficient change between [0, 1]
        0 - no connection
        1 - full connection
        @param X    First raster's array
        @param Y    Second raster's array
        '''
        #T, sum_r, sum_s, total, r, s = compute_table(X, Y)
        table = self.getCrosstable()
        T = table.getProbtable()             #Pij = Tij / total
        sum_rows = table.getProbRows()       #Pi. = Ti. / total  i=[0,(r-1)]
        sum_cols = table.getProbCols()       #P.j = T.j / total  j=[0,(s-1)]

        #to calculate the entropy we take the logarithm,
        #logarithm of zero does not exist, so we must mask zero values
        sum_rows = np.compress(sum_rows != 0, sum_rows)
        sum_cols = np.compress(sum_cols != 0, sum_cols)
        #Compute the entropy coeff. of two raster
        H_x = -np.sum(sum_rows * np.log(sum_rows))
        H_y = -np.sum(sum_cols * np.log(sum_cols))
        #Compute the joint entropy coeff.
        T = np.ma.array(T, mask=(T == 0))
        T = np.ma.compressed(T)
        H_xy = -np.sum(T * np.log(T))
        # Compute the Joint Information Uncertainty
        U = 2.0 * ((H_x + H_y - H_xy)/(H_x + H_y))

        return U
Пример #24
0
 def estimateState(self):
     """ Updates the estimate of the state """
     best = numpy.argmax(self.Weights)
     beststate = self.States[best,:]
     
     #print "Best State:", beststate
     
     cond = (numpy.sum(numpy.fabs(self.States - beststate), axis=1) < 1)
     beststates = numpy.compress(cond, self.States, axis=0)
     bestweights = numpy.compress(cond, self.Weights)
     
     #print "States", self.States
     #print "States within window:", cond
     #print "States close to best", len(beststates), beststates
     #print "Weights close to best", bestweights
     
     #print "Product:", (bestweights*beststates.T).T
     bestweights /= numpy.sum(bestweights)
     self.State = numpy.sum((bestweights*beststates.T).T, axis=0)
     #print "Estimate:", self.State
     
     #print numpy.fabs(numpy.arctan2(self.State[Localisation.YDOT], self.State[Localisation.XDOT]) - self.State[Localisation.THETA]) -  self.__controlToVelocityVector()
     
     if numpy.isnan(self.State[0]):
         print "FAIL"
     self.__updateAttributesFromState()
Пример #25
0
def roiEnergyAnalysis(data):
    '''Troubleshooting function, compares the observed sum energy in an ROI to the genEnergy''' 
    genEnergies = [] 
    sumEnergies = []
    pbar = progressbar("Processing event &count&:", len(data)+1)
    pbar.start()
    count = 0
    for event in data:
        genEnergy = event[2]['getpt'] * np.cosh(event[2]['geneta'])         
        for i in range(len(genEnergy)):
            clustersIndices = np.compress(event[1]['ROI'] == i, event[1]['clusterID'], axis=0)      #|Only take clusters corresponding to right ROI
            clusterEnergies = []
            for clusterID in clustersIndices:                                                       #|Only take hits corresponding to correct cluster
                hits = np.compress(event[0]['clusterID'] == clusterID, event[0], axis=0) 
                energies = hits['en'] 
                for energy in energies: 
                    clusterEnergies.append(energy)                                                  #|Add the energy to the cluster energies
            ROIEnergy = np.sum(clusterEnergies)
            # Append to original lists
            genEnergies.append(genEnergy[i])
            sumEnergies.append(ROIEnergy)
        pbar.update(count)
        count += 1
    pbar.finish()
    # np.save("sums.npy", sumEnergies)
    # np.save("gens.npy", genEnergies)
    # Plot it
    Plotter.sumEnergyVsGenEnergy(sumEnergies, genEnergies) 
Пример #26
0
def gammaGunFilter(data, quiet=True):
    '''Filters gamma gun data set to clean it up some, removing some of the crap.'''
    if not quiet: print "Filtering"   
    data     = np.compress([len(event[2]) >= 2 for event in data], data, axis=0)                    #|Get at least two ROI's    
    data     = np.compress([np.max(event[2]['eta'])*np.min(event[2]['eta']) < 0 for event in data], 
                           data, axis=0)                                                            #|Require an eta separation of opposite endcaps to prevent high errors
    return data
Пример #27
0
 def make_lcdata(self):
     #cut the data
     mask = (self.goodframes>0)
     self.tarr=np.compress(mask,self.dtime)
     self.rarr=np.compress(mask,self.ratio)
     self.tfarr=np.compress(mask,self.tflux)
     self.cfarr=np.compress(mask,self.cflux)
Пример #28
0
    def get_posterior_sample(self, n):
        """
        Return a sample of the posterior distribution.
        Uses SIR algorithm.

        :Parameters:
            - `n`: Sample size.
        """
        if self.posterior.any():# Use last posterior as prior
            k = stats.kde.gaussian_kde(self.posterior)
            s = k.resample(n)
        else:
            s = self.get_prior_sample(n)
        if self.data != None:
            m = self.rang[0]
            M = self.rang[1]
            step = self.res
            supp = arange(m, M, step)#support
            s = compress(less(s.ravel(), M) & greater(s.ravel(), m), s)#removing out-of-range samples
            d = stats.uniform.rvs(loc=0, scale=1, size=len(s))#Uniform 0-1 samples
            w = self.pdf(supp) * self.likelihood
            w = w / sum(w) #normalizing weights
            sx = searchsorted(supp, s)
            w = w[sx-1]#search sorted returns 1-based binlist
            post = compress(d < w, s)
            self.posterior = post
            return post
        else:
            return array([])
Пример #29
0
def calcZ01andZ10(Y, MPS):
    try:
        U, S, V = spla.svd(Y, full_matrices=True)
    except spla.LinAlgError as err:
        if 'empty' in err.message:
            row, col = Y.shape
            Z01 = np.array([], dtype=Y.dtype).reshape(row, 0)
            Z10 = np.array([], dtype=Y.dtype).reshape(0, col)
            print "Empty", Z01.shape, Z10.shape
        else:
            print >> sys.stderr, "calcZ01andZ10: Error", I, err
            raise
    else:
        print "S", S, "\nU", U, "\nV", V
        __, chi, __ = MPS.shape
        mask = (S > expS) #np.array([True] * S.shape[0])
        mask[xiTilde - chi:] = False
        U = np.compress(mask, U, 1)
        S = np.compress(mask, S, 0)
        V = np.compress(mask, V, 0)

        Ssq = np.diag(np.sqrt(S))
        Z01 = np.dot(U, Ssq)
        Z10 = np.dot(Ssq, V)
        print "Fill ", U.shape, V.shape, "mask", mask

    eps = np.linalg.norm(np.dot(Z01, Z10))
    print "eps", I, eps
    print "Z01", Z01.shape, "\n", Z01, "\nZ10", Z10.shape, "\n", Z10

    return Z01, Z10
Пример #30
0
def ref(m):
    if m.shape[0]==1:
        for i in range(1,m.shape[1]):
            m[0,i]=m.item(0,i)/m.item(0,0)
        m[0,0]=1
        return m
    m=np.copy(trim(m))
    if m.item(0,0)==0:
        for i in range(1,m.shape[0]):
            if m.item(i,0)!=0:
                m=np.copy(swap(m,0,i))
                break
    for i in range(1,m.shape[1]):
        m[0,i]=m.item(0,i)/m.item(0,0)
    m[0,0]=1
    for j in range(1,m.shape[0]):
        for k in range(1,m.shape[1]):
            m[j,k]=m.item(j,k)-(m.item(j,0))*(m.item(0,k))
        m[j,0]=0
    a=[False]
    b=[False]
    for i in range(1,m.shape[0]):
        a=np.append(a,True)
    for i in range(1,m.shape[1]):
        b=np.append(b,True)
    n=np.compress(a,np.compress(b,m,axis=1),axis=0)
    n=np.copy(rref2(n))
    for i in range(1,m.shape[0]):
        for j in range(1,m.shape[1]):
            m[i,j]=n.item(i-1,j-1)
    return m
Пример #31
0
    def refine(self, gr, tol=0.05):
        tx, ty, tz = gr.translation
        wvln = float(self.pars.get("wavelength"))
        if hasattr(gr, "pks") and gr.pks is not None:
            #print "Got pks"
            pks = gr.pks
            XS = self.XS[:, pks]
            XB = self.XB[:, pks]
        else:
            #print "New pks"
            XS = self.XS
            XB = self.XB
            pks = np.arange(len(XS[0]))
        ret = d_XSXB_to_gv(XS, XB, tx, ty, tz, wvln)
        gv = ret[0]
        dg0_dt = ret[1], ret[4], ret[7]
        dg1_dt = ret[2], ret[5], ret[8]
        dg2_dt = ret[3], ret[6], ret[9]
        hklr = np.dot(gr.ubi, gv)
        hkli = np.round(hklr)
        hkle = hklr - hkli
        scor = np.sqrt((hkle * hkle).sum(axis=0))
        #print "before",len(pks),pks
        if tol is not None:
            use = np.compress(scor < tol, np.arange(len(gv[0])))
            #print "after",len(pks),pks
            gr.pks = pks[use]
        else:
            use = np.arange(len(gr.pks), dtype=int)
        #print "score = ", scor[pks].sum()/len(pks), len(pks), tol
        # peaks to use are those with scor OK
        #
        #  UB.h = gvcalc
        #  dg/dUB = h
        #  dg/dT  = found above
        gcalc = np.dot(gr.UB, hkli)
        diff = np.take(gv - gcalc, use, axis=1)
        # print diff.shape, pks.shape
        # gv[0],[1],[2] = 3
        # tx, ty, ty    = 3
        # UB00 ... UB22 = 9
        # want derivative of diff w.r.t each variable
        grads = np.zeros((12, 3, len(use)))
        for i in range(3):
            for j in range(3):  # tx, ty, tz
                #print 1+j*3+i,
                #print ret[1+j*3+i].shape
                grads[j, i] = ret[1 + j * 3 + i][use]
        #     print grads[j,i]
            for j in range(3):
                # gx = 0h + 1k + 2l
                # gy = 3h + 4k + 5l
                # gz = 6h + 7k + 8l
                # i is gx, gy, gz
                # j is ub elements
                grads[3 + j + i * 3, i] = hkli[j][use]
        #     print grads[3+j+i*3,i]
        # grains = 12, xyz, pks
        mtx = np.zeros((12, 12))
        for i in range(12):
            for j in range(i, 12):
                for k in range(3):
                    mtx[i, j] += (grads[i, k, :] * grads[j, k, :]).sum()
                if j != i:
                    mtx[j, i] = mtx[i, j]

        #    mtx = np.dot( grads, grads.T) # vector, outer, ?
        rhs = np.zeros(12)
        for i in range(12):
            for k in range(3):
                rhs[i] += (grads[i, k] * diff[k]).sum()
        #print mtx
        # print rhs
        imt = np.linalg.inv(mtx)
        shifts = np.dot(imt, rhs)
        tx = tx - shifts[0]
        ty = ty - shifts[1]
        tz = tz - shifts[2]
        gr.translation = [tx, ty, tz]
        ub = gr.UB.ravel()
        np.add(ub, shifts[3:], ub)
        gr.set_ubi(np.linalg.inv(np.reshape(ub, (3, 3))))
        gr.npks = len(use)
        #1/0
        return gr
Пример #32
0
if __name__ == "__main__":
    import sys
    from ImageD11.indexing import ubitocellpars
    o = fittrans(sys.argv[1], sys.argv[2])
    gl = grain.read_grain_file(sys.argv[3])
    gref = gl[0]
    import time
    start = time.time()
    gfl = []
    ng = 0
    # Take old peak assignments:
    if 1:
        print("Using existing peak assignments")
        inds = np.arange(o.colfile.nrows, dtype=int)
        for i, gref in enumerate(gl):
            gref.pks = np.compress(o.colfile.labels == i, inds)
        tols = [
            None,
        ] * 3
    else:
        tols = [0.05, 0.01, 0.0075]
    for gref in gl:
        for ii, tol in enumerate(tols):
            #print gref.translation
            gref = o.refine(gref, tol=tol)
            #print ii, gref.translation, gref.npks,
            #print i,gref.npks
#        gref.pks = None
# re-assign after convergence
#        gref = o.refine( gref, tol=0.0075)
        gfl.append(gref)
Пример #33
0
                dataBlockingResultsFile +
                ' already exists but is empty...It will be replaced with a new file.\n'
            )
            fe = 0
    else:
        print(dataBlockingResultsFile +
              ' does not exists.  It will be created.\n')
        fe = 0

    if fe == 0:
        kk = kk + 1
        print("Analyzing file: " + tf + "\n")

        thermo_data_raw = np.genfromtxt(tf, names=True)
        condition = np.logical_and(eqData[:, 0] == P, eqData[:, 1] == T)
        eqSteps = np.compress(condition, eqData, axis=0)[0, 2].astype(int)
        uncorrelatedBlockSize = np.compress(condition, eqData,
                                            axis=0)[0, 3].astype(int)
        LEstartSteps = np.compress(condition, eqData, axis=0)[0, 4].astype(int)
        LEstartSteps = max(LEstartSteps, eqSteps)

        printInterval = int(thermo_data_raw[1]['Step'] -
                            thermo_data_raw[0]['Step'])
        le_data_raw = thermo_data_raw[int(LEstartSteps / printInterval) +
                                      1:][['Step', 'lE']]
        thermo_data_raw = thermo_data_raw[int(eqSteps / printInterval) + 1:]
        #for rawSampleNumber in range(np.size(thermo_data_raw)):
        #print("rawSampleNumber: " + str(rawSampleNumber) + " rawSample: " + str(thermo_data_raw[rawSampleNumber]))

        #energy_tot_mean = np.mean(thermo_data_raw[:]['Energy'])
        #print("Total mean energy: " + str(energy_tot_mean));
MBR_pheno_input = MBR_pheno_input[skitzo_class != 0]
MBR_pheno_h = read_header(
    path + "/data_encoded/phenotypes_age/mbr_cat_headers_age.txt")
MBR_pheno, MBR_pheno_input, MBR_pheno_h = remove_not_obs_cat(
    MBR_pheno, MBR_pheno_input, MBR_pheno_h, 0.01)

sibling_pheno, sibling_pheno_input = read_cat(
    path + "/data_encoded/input/sibling_cat.npy")
sibling_pheno = sibling_pheno[skitzo_class != 0]
sibling_pheno_input = sibling_pheno_input[skitzo_class != 0]
sibling_pheno_h = read_header(
    path + "/data_encoded/phenotypes_age/sibling_cat_headers.txt")
sibling_pheno, sibling_pheno_input, sibling_pheno_h = remove_not_obs_cat(
    sibling_pheno, sibling_pheno_input, sibling_pheno_h, 0.01)
sibling_pheno = np.compress((sibling_pheno != 0).sum(axis=(0, 1)),
                            sibling_pheno,
                            axis=2)

# combine MBR and sibling
#MBR_sibling = np.concatenate((MBR_pheno, sibling_pheno), axis=1)
#MBR_sibling_h = np.concatenate((MBR_pheno_h, sibling_pheno_h))

## load in genotype
geno, geno_input = read_cat(path + "/data_encoded/input/genotypes_all.npy")
geno = geno[skitzo_class != 0]
geno_input = geno_input[skitzo_class != 0]
geno_h = read_header(path + "/data_encoded/genomics/genotypes_headers_all.txt")
geno, geno_input, geno_h = remove_not_obs_ordinal(geno, geno_input, geno_h,
                                                  0.01)

hla_pheno, hla_pheno_input = read_cat(path +
Пример #35
0
if (export_mesh):
    m.export_to_vtk('mesh.vtk')
    print('\nYou can view the mesh for instance with')
    print('mayavi2 -d mesh.vtk -f ExtractEdges -m Surface \n')

#  Integration method used
# mim = gf.MeshIm(m, gf.Integ('IM_PYRAMID_COMPOSITE(IM_TETRAHEDRON(6))'))
mim = gf.MeshIm(m, gf.Integ('IM_PYRAMID(IM_GAUSS_PARALLELEPIPED(3,3))'))
# mim = gf.MeshIm(m, gf.Integ('IM_TETRAHEDRON(5)'))

# Boundary selection
flst = m.outer_faces()
fnor = m.normal_of_faces(flst)
tleft = abs(fnor[1, :] + 1) < 1e-14
ttop = abs(fnor[0, :] - 1) < 1e-14
fleft = np.compress(tleft, flst, axis=1)
ftop = np.compress(ttop, flst, axis=1)
fneum = np.compress(True - ttop - tleft, flst, axis=1)

# Mark it as boundary
DIRICHLET_BOUNDARY_NUM1 = 1
DIRICHLET_BOUNDARY_NUM2 = 2
NEUMANN_BOUNDARY_NUM = 3
m.set_region(DIRICHLET_BOUNDARY_NUM1, fleft)
m.set_region(DIRICHLET_BOUNDARY_NUM2, ftop)
m.set_region(NEUMANN_BOUNDARY_NUM, fneum)

# Interpolate the exact solution (Assuming mfu is a Lagrange fem)
Ue = mfu.eval('y*(y-1)*x*(x-1)+x*x*x*x*x')

# Interpolate the source term
Пример #36
0
def kaplan_meier_estimator(event,
                           time_exit,
                           time_enter=None,
                           time_min=None,
                           reverse=False):
    """Kaplan-Meier estimator of survival function.

    See [1]_ for further description.

    Parameters
    ----------
    event : array-like, shape = (n_samples,)
        Contains binary event indicators.

    time_exit : array-like, shape = (n_samples,)
        Contains event/censoring times.

    time_enter : array-like, shape = (n_samples,), optional
        Contains time when each individual entered the study for
        left truncated survival data.

    time_min : float, optional
        Compute estimator conditional on survival at least up to
        the specified time.

    reverse : bool, optional, default: False
        Whether to estimate the censoring distribution.
        When there are ties between times at which events are observed,
        then events come first and are subtracted from the denominator.
        Only available for right-censored data, i.e. `time_enter` must
        be None.

    Returns
    -------
    time : array, shape = (n_times,)
        Unique times.

    prob_survival : array, shape = (n_times,)
        Survival probability at each unique time point.
        If `time_enter` is provided, estimates are conditional probabilities.

    Examples
    --------
    Creating a Kaplan-Meier curve:

    >>> x, y = kaplan_meier_estimator(event, time)
    >>> plt.step(x, y, where="post")
    >>> plt.ylim(0, 1)
    >>> plt.show()

    References
    ----------
    .. [1] Kaplan, E. L. and Meier, P., "Nonparametric estimation from incomplete observations",
           Journal of The American Statistical Association, vol. 53, pp. 457-481, 1958.
    """
    event, time_enter, time_exit = check_y_survival(event,
                                                    time_enter,
                                                    time_exit,
                                                    allow_all_censored=True)
    check_consistent_length(event, time_enter, time_exit)

    if time_enter is None:
        uniq_times, n_events, n_at_risk, n_censored = _compute_counts(
            event, time_exit)

        if reverse:
            n_at_risk -= n_events
            n_events = n_censored
    else:
        if reverse:
            raise ValueError(
                "The censoring distribution cannot be estimated from left truncated data"
            )

        uniq_times, n_events, n_at_risk = _compute_counts_truncated(
            event, time_enter, time_exit)

    # account for 0/0 = nan
    ratio = numpy.divide(n_events,
                         n_at_risk,
                         out=numpy.zeros(uniq_times.shape[0], dtype=float),
                         where=n_events != 0)
    values = 1.0 - ratio

    if time_min is not None:
        mask = uniq_times >= time_min
        uniq_times = numpy.compress(mask, uniq_times)
        values = numpy.compress(mask, values)

    y = numpy.cumprod(values)
    return uniq_times, y
Пример #37
0
def qtapr(ballots,
          weights,
          cnames,
          numseats,
          verbose=0,
          use_mj=True,
          use_two_q=False):
    """Run quota threshold approval rating method (MJ-style or
    Bucklin-style) to elect <numseats> winners in a Droop proportional
    multiwnner election.
    """

    numballots, numcands = np.shape(ballots)
    ncands = numcands

    numvotes = weights.sum()
    numvotes_orig = float(numvotes)  # Force a copy

    quota = droopquota(numvotes, numseats)

    maxscore = int(ballots.max())

    cands = np.arange(numcands)

    winners = []

    maxscorep1 = maxscore + 1

    factor_array = []
    qthresh_array = []

    for seat in range(numseats):

        if verbose > 0:
            print("- " * 30, "\nStarting count for seat", seat + 1)
            print("Number of votes:", myfmt(numvotes))

        # ----------------------------------------------------------------------
        # Tabulation:
        # ----------------------------------------------------------------------
        # Score and Cumulative Score arrays (summing downward from maxscore)
        S, T = tabulate_score_from_ratings(ballots, weights, maxscore, ncands)
        (winner, winsum, factor, ranking, ratings) = aqt(maxscore,
                                                         quota,
                                                         ncands,
                                                         cands,
                                                         S,
                                                         T,
                                                         use_mj=use_mj,
                                                         use_two_q=use_two_q)

        winner_quota_threshold = ratings[winner][0]

        # Seat the winner, then eliminate from candidates for next count
        if verbose:
            print("\n-----------\n*** Seat {}: {}\n-----------\n".format(
                seat + 1, cnames[winner]))
            if verbose > 1:
                print("QTAR ranking for this seat:")
                if use_mj:
                    if use_two_q:
                        for c in ranking:
                            r, twoqavg, *rest = ratings[c]
                            print("\t{}:({},{},{})".format(
                                cnames[c], r, myfmt(twoqavg), ",".join([
                                    "({},{})".format(s, myfmt(t))
                                    for s, t in rest
                                ])))
                    else:
                        for c in ranking:
                            r, *rest = ratings[c]
                            print("\t{}:({},{})".format(
                                cnames[c], r, ",".join([
                                    "({},{})".format(s, myfmt(t))
                                    for s, t in rest
                                ])))
                else:
                    if use_two_q:
                        for c in ranking:
                            r, twoqavg, t = ratings[c]
                            print("\t{}:({},{},{})".format(
                                cnames[c], r, myfmt(twoqavg), myfmt(t)))
                    else:
                        for c in ranking:
                            r, t = ratings[c]
                            print("\t{}:({},{})".format(
                                cnames[c], r, myfmt(t)))
                print("")

        if (seat < numseats):
            winners += [winner]
            cands = np.compress(cands != winner, cands)

        weights = np.multiply(
            weights,
            np.where(ballots[..., winner] < winner_quota_threshold, 1, factor))
        numvotes = weights.sum()
        scorerange = np.arange(maxscorep1)

        factor_array.append(factor)
        qthresh_array.append(winner_quota_threshold)

        # Reweight ballots:
        winscores = ballots[..., winner]
        if verbose:
            print("Winner's votes per rating: ", (", ".join([
                "{}:{}".format(j, myfmt(f))
                for j, f in zip(scorerange[-1:0:-1], S[-1:0:-1, winner])
            ])))
            print("After reweighting ballots:")
            print("\tQuota:  {}%".format(myfmt(quota / numvotes_orig * 100)))
            print(("\tWinner's quota approval threshold rating "
                   "before reweighting:  {}%").format(
                       myfmt((winsum / numvotes_orig) * 100)))
            print("\tReweighting factor:  ", factor)
            print(("\tPercentage of vote remaining "
                   "after reweighting:  {}%").format(
                       myfmt((numvotes / numvotes_orig) * 100)))

    if verbose > 1 and numseats > 1:
        print("- " * 30 + "\nReweighting factors for all seat winners:")
        for w, f, qt in zip(winners, factor_array, qthresh_array):
            print("\t{} : ({}, {})".format(cnames[w], myfmt(qt), myfmt(f)))

    if verbose > 3 and numseats > 1:
        print("- " * 30 + "\nRemaining ballots and weights:")
        print("{},{}".format("weight", ','.join(cnames)))
        for w, ballot in zip(weights, ballots):
            print("{},{}".format(myfmt(w), ','.join([str(b) for b in ballot])))

    return (winners)
Пример #38
0
 def active_from_full(self, joints):
     return np.compress(self.active_links_mask, joints, axis=0)
Пример #39
0
def iFAB_PCL_Sequence( Points4D, pltFlag = 0 ):

    N = Points4D.shape[0]; #N = size(Points4D,1);
    Seq = 0; #Seq = 1;

    SequenceIx = np.arange(0,N);
    SortedSequence = np.zeros(N);
    
    
    # Sequentialize Data from Index Marker

    SeqData = Points4D[:,3];
    SeqData1 = np.roll(SeqData,-1);
    SeqDiff = (SeqData - SeqData1); # Find Difference between adjacent values
    SeqDiff = np.delete(SeqDiff,[SeqDiff.size - 1]);

    # Identify huge jumps as folds

    FoldPos = (SeqDiff > 1024);
    Folds = sum(FoldPos);
    CarryOverFlag = 0;
    IxBuffer = np.array([0]);
    CarryOverBuffer = np.array([0,0,0,0]);
    if (Folds > 0):

        print "\t\t Total Folds Identified : ", Folds, "\n";
        FoldIx = SequenceIx[FoldPos];
        F_ix = np.arange(0,FoldIx.size)
        Fd = np.roll(FoldIx,-1);
        
        Overlapped = (abs(FoldIx - Fd) < 100);
        Overlapped[-1] = 0;

        for i in xrange(0,Folds):
            
            if np.logical_not((Overlapped[i])):

                BufferData = Points4D[Seq:(FoldIx[i] + 1),3];   # Get first set
                I = np.argsort(BufferData);                     # Sort picked fold
                SortedSequence[Seq:FoldIx[i] + 1] = I + Seq;    # Save sorted fold
                Seq = FoldIx[i] + 1;                            # Update fold index
                
                if CarryOverFlag: # if previous folds were redundant act accordingly
                    In_Ix = np.arange(0,IxBuffer.size - 1);
                    Points4D = np.insert(Points4D,In_Ix + Seq,CarryOverBuffer[1:,:],0); # re-insert removd values into next fold
                    CarryOverBuffer = np.array([[0],[0],[0],[0]]); # empty accumulated buffer
                    FoldIx = FoldIx + IxBuffer.size - 1; # update index values
                    IxBuffer = np.array([0]); #reset redundant fold count
                
                if (i==(Folds-1)):                                  # If last fold index, sort the rest

                    I = np.argsort(Points4D[Seq:,3]);
                    SortedSequence[Seq:] = Seq + I;
                
                CarryOverFlag = 0;
                CarryOverBuffer = np.array([0,0,0,0]);
            
            else:
                
                CarryOverFlag = 1; # Mark for adding values into next fold
                IxBuffer = np.hstack((IxBuffer,FoldIx[i]+1));   # Monitor fold overlaps 
  
                CarryOverBuffer = np.vstack((CarryOverBuffer,Points4D[FoldIx[i]+1,:]))  # Accumulate Values at overlapping folds               
                
                B = np.zeros([np.size(Points4D,0),1]); # remove the values at overlapping folds
                B = B+1;
                B[FoldIx[i]+1] = 0;
                B = B.flatten();
                Points4D = np.compress(B,Points4D,0);
                
                FoldIx = FoldIx - 1; # update fold index after popping a value

    else:

        print "\t\t No Folds Identified"
        I = np.argsort(Points4D[:,3]);
        SortedSequence = I;
       

    OutOfSequence = sum(SortedSequence != SequenceIx);
    print "\t\t Points out of Sequence: ", OutOfSequence, "\n";   
    Points3D = Points4D[np.int64(SortedSequence),0:3];
    
    return Points3D
Пример #40
0
def imageSwathVar(granules,
                  variable,
                  scaleFactor,
                  title,
                  outFile,
                  filterMin=None,
                  filterMax=None,
                  scaleMin=None,
                  scaleMax=None,
                  imageWidth=None,
                  imageHeight=None,
                  plotType='map',
                  projection='cyl',
                  markerSize=10,
                  **options):
    if filterMin == 'auto': filterMin = None
    if filterMax == 'auto': filterMax = None
    #    files = [localize(url) for url in granules if url != 'None']
    files = granules
    imageFiles = []
    lonLatBounds = []

    for i, file in enumerate(files):
        print 'imageSwathVar: Reading %s: %s' % (file, variable)
        localFile = localize(file, retrieve=False)
        if i == 0:
            swath = hdfeos.swaths(file)[0]
#            geoFields = hdfeos.swath_geo_fields(file, swath)
        lat = hdfeos.swath_field_read(file, swath, 'Latitude')
        lon = hdfeos.swath_field_read(file, swath, 'Longitude')
        ###        time = hdfeos.swath_field_read(file, swath, 'Time')
        ###        pressure = hdfeos.swath_field_read(file, swath, '???')

        if N.minimum.reduce(lon.flat) < -360. or N.minimum.reduce(
                lat.flat) < -90.:
            useImageMap = False  # have missing values in lat/lon coord variables
        else:
            useImageMap = True

        dataFields = hdfeos.swath_data_fields(file, swath)
        if '[' not in variable:
            varName = variable
        else:
            varName, slice = variable.split('[')
        if varName not in dataFields:
            die('%s not a variable in %s' % (variable, file))
        if '[' not in variable:
            var = hdfeos.swath_field_read(file, swath,
                                          variable) * float(scaleFactor)
        else:
            vals = hdfeos.swath_field_read(file, swath, varName)
            var = eval('['.join(('vals', slice)))
            var = var * float(scaleFactor)

        print 'imageSwathVar: Variable range: %f -> %f' % (min(
            min(var)), max(max(var)))
        if plotType != 'map' or not useImageMap:
            lat = lat.flat
            lon = lon.flat
            var = var.flat

        if filterMin is not None or filterMax is not None:
            if filterMin is not None and filterMax is None:
                cond = N.greater(var, float(filterMin))
            elif filterMin is None and filterMax is not None:
                cond = N.less(var, float(filterMax))
            else:
                cond = N.logical_and(N.greater(var, float(filterMin)),
                                     N.less(var, float(filterMax)))
            if plotType == 'map' and useImageMap:
                lat = MA.masked_where(cond, lat, copy=0)
                lon = MA.masked_where(cond, lon, copy=0)
                var = MA.masked_where(cond, var, copy=0)
            else:
                lat = N.compress(cond, lat.flat)
                lon = N.compress(cond, lon.flat)
                var = N.compress(cond, var.flat)

        lonLatBound = (min(min(lon)), min(min(lat)), max(max(lon)),
                       max(max(lat)))
        lonLatBounds.append(lonLatBound)

        if plotType == 'map':
            imageFile = localFile + '_image.png'
            if useImageMap:
                upOrDown = 'upper'
                if lat[0, 0] < lat[-1, 0]: upOrDown = 'lower'
                if lon[0, 0] > lon[0, -1]: var = fliplr(var)
                image2(var,
                       scaleMin,
                       scaleMax,
                       imageFile,
                       upOrDown=upOrDown,
                       **options)


#                plainImage2(var, imageFile)
            else:
                marksOnMap(lon,
                           lat,
                           var,
                           scaleMin,
                           scaleMax,
                           imageWidth,
                           imageHeight,
                           imageFile,
                           projection,
                           autoBorders=True,
                           title=title + ' ' + file,
                           sizes=markerSize * markerSize,
                           **options)
        elif plotType == 'hist':
            imageFile = localFile + '_aot_hist.png'
            hist(var, 50, imageFile)
        else:
            die("plotSwathVar: plotType must be 'map' or 'hist'")

        imageFiles.append(imageFile)
    print "imageSwathVar results:", imageFiles
    return (imageFiles, lonLatBounds)
Пример #41
0
def errore(img, imgpsf, coordlist, size, truemag, fwhm0, leng0, _show,
           _interactive, _numiter, z11, z22, midpt, nax, nay, xbgord0, ybgord0,
           _recenter, apco0, dmax, dmin):
    import lsc
    import os, sys, re, string
    from numpy import array, mean, std, compress, average
    from pyraf import iraf
    if not _numiter: _numiter = 3
    dartf = 100
    while dartf >= size - 1:
        if _interactive:
            artfac0 = raw_input(
                '>>> Dispersion of artificial star positions (in units of FWHM) [1] '
            )
            if not artfac0: artfac0 = 1
        else:
            artfac0 = 1
        try:
            artfac0 = float(artfac0)
            if float(artfac0) >= size - 1:
                print '!!! WARNING: ' + str(
                    artfac0) + ' too large (max ' + str(size) + '- 1)'
                print 'try again....'
            else:
                dartf = artfac0
        except:
            print '#### WARNING: ' + str(artfac0) + ' should be a number !!!!'
            print 'try again....'
    lsc.util.delete("tmpar?")

    lsc.util.delete('artskyfit.fits')
    os.system('cp skyfit.fits artskyfit.fits')
    i = 0
    tmpart = []
    while i <= 8:
        lsc.util.delete(
            "reserr.fit?,artbg.fit?,artstar.fit?,artres.fit?,artfit.fit?")
        artrad = fwhm0 / 2.
        #artseed = artseed+1234
        artx = int(i / 3.) - 1
        if i <= 2: arty = artx + i
        if 3 <= i <= 5: arty = artx - 1 + i - 3
        if i >= 6: arty = artx - 2 + i - 6

        ff = open(img + ".sn.coo", 'r')
        ss = ff.readline()
        ff.close()
        xbb = float(string.split(ss)[0])
        ybb = float(string.split(ss)[1])

        xbb = xbb + artx * fwhm0 * artfac0
        ybb = ybb + arty * fwhm0 * artfac0

        lsc.util.delete(coordlist)
        ff = open(coordlist, 'w')
        ff.write(str(xbb) + '  ' + str(ybb) + '  ' + str(truemag[0]) + "  1")
        ff.close()

        xb1 = int(float(xbb) - fwhm0 * float(leng0) / 2)
        xb2 = int(float(xbb) + fwhm0 * float(leng0) / 2)
        yb1 = int(float(ybb) - fwhm0 * float(leng0) / 2)
        yb2 = int(float(ybb) + fwhm0 * float(leng0) / 2)
        sec = "1 " + str(xb1) + " 1 " + str(nay) + '\n'
        sec = sec + str(xb2) + ' ' + str(nax) + " 1 " + str(nay) + '\n'
        sesc = sec + str(xb1) + ' ' + str(xb2) + " 1 " + str(yb1) + '\n'
        sec = sec + str(xb1) + ' ' + str(xb2) + ' ' + str(yb2) + ' ' + str(
            nay) + '\n'
        ff = open('sec', 'w')
        ff.write(sec)
        ff.close()

        lsc.util.delete("reserr.ar?")
        lsc.util.delete("artlist.ma?")
        lsc.util.delete("artsky.fit?")
        lsc.util.delete("artbg.fit?")
        lsc.util.delete("artbgs.fit?")
        lsc.util.delete("artsn.fit?")
        lsc.util.delete("artres.fit?")
        lsc.util.delete("artlist.al?")

        iraf.addstar("artskyfit",
                     coordlist,
                     imgpsf,
                     "reserr",
                     nstar=1,
                     veri='no',
                     simple='yes',
                     verb='no')
        # reserr = skyfit + artificial star ########
        inp = "artbg.fits[" + str(xb1) + ":" + str(xb2) + "," + str(
            yb1) + ":" + str(yb2) + "]"
        out = "artsky.fits[" + str(xb1) + ":" + str(xb2) + "," + str(
            yb1) + ":" + str(yb2) + "]"
        iraf.imsurfit("reserr",
                      "artbg",
                      xorder=xbgord0,
                      yorder=ybgord0,
                      regions="section",
                      section="sec")
        midpt = np.mean(fits.getdata('artbg.fits'))
        iraf.imcopy('reserr.fits', 'artsky.fits')
        iraf.imcopy(inp, 'artbgs.fits')
        iraf.imcopy("artbgs.fits", out)
        iraf.imarith("reserr",
                     "-",
                     "artsky",
                     "artsn",
                     calctype="r",
                     pixtype="r",
                     verb='no')
        iraf.imarith("artsn", "+", midpt, "artsn", verb='no')

        artap1, artap2, artap3, artmag1, artmag2, artmag3, dartmag1, dartmag2, dartmag3, artfitmag, arttruemag, artmagerr, artcentx, artcenty = \
            fitsn(img,imgpsf,coordlist,_recenter,fwhm0,'reserr','artsn','artres',_show,_interactive,dmax,dmin,z11,z22,midpt,size,apco0)

        for ii in range(0, _numiter):
            lsc.util.delete("reserr.ar?")
            lsc.util.delete("artlist.ma?")
            lsc.util.delete("artsky.fit?")
            lsc.util.delete("artbg.fit?")
            lsc.util.delete("artbgs.fit?")
            lsc.util.delete("artsn.fit?")
            lsc.util.delete("artres.fit?")
            lsc.util.delete("artlist.al?")

            iraf.imsurfit("skyfit",
                          "artbg",
                          xorder=xbgord0,
                          yorder=ybgord0,
                          regions="section",
                          section="sec")
            midpt = np.mean(fits.getdata('artbg.fits'))
            iraf.imcopy("reserr.fits", "artsky.fits")
            iraf.imcopy(inp, "artbgs.fits")
            iraf.imcopy("artbgs.fits", out)

            iraf.imarith("reserr",
                         "-",
                         "artsky",
                         "artsn",
                         calctype="r",
                         pixtype="r",
                         verb='no')
            iraf.imarith("artsn.fits", "+", midpt, "artsn.fits", verb='no')
            artap1, artap2, artap3, artmag1, artmag2, artmag3, dartmag1, dartmag2, dartmag3, artfitmag, arttruemag, artmagerr, artcentx, artcenty = \
                fitsn(img,imgpsf,coordlist,_recenter,fwhm0,'reserr','artsn','artres',_show,_interactive,dmax,dmin,z11,z22,midpt,size,0)
#######
        if i == 0: era = 'yes'
        else: era = 'no'
        artx = .5 + .25 * artx
        arty = .5 + .25 * arty
        if _show:
            _tmp1, _tmp2, goon = lsc.util.display_image('skyfit.fits',
                                                        1,
                                                        '',
                                                        '',
                                                        False,
                                                        _xcen=artx,
                                                        _ycen=arty,
                                                        _xsize=.25,
                                                        _ysize=.25,
                                                        _erase=era)
        try:
            tmpart.append(float(arttruemag[0]))
        except:
            pass
        i = i + 1

    for i in tmpart:
        print i

    print " ########## "
    try:
        media = mean(array(tmpart))
        arterr = std(array(tmpart))
        arterr2 = std(
            compress((average(tmpart) - std(tmpart) < array(tmpart)) &
                     (array(tmpart) < average(tmpart) + std(tmpart)),
                     array(tmpart)))
    except:
        media = 0
        arterr = 0
        arterr2 = 0
    print '### average = %6.6s \t arterr= %6.6s ' % (str(media), str(arterr))
    print '###  %6.6s \t (error at 1 sigma rejection) ' % (str(arterr2))
    lsc.util.delete(
        "reserr.fit?,artbg.fit?,artstar.fit?,artres.fit?,artfit.fit?,artskyfit.fit?"
    )
    lsc.util.delete("reserr.ar?")
    lsc.util.delete("artlist.co?")
    return arterr2, arterr
Пример #42
0
def qta(maxscore,
        quota,
        ncands,
        remaining,
        S,
        T,
        use_mj=True,
        use_two_q=False):
    """Quota Threshold approval single-winner method, using either
    Majority Judgment style tie-breaker for the approval quota
    threshold (default) or ER-Bucklin-ratings style
    """
    ratings = dict()
    twoq = quota * 2

    for c in remaining:
        r1q_unset = True
        r1q = 0
        r2q = 0
        tt_surplus = 0.
        ss = S[..., c]
        tt = T[..., c]
        s = 0
        for r in range(maxscore, -1, -1):
            s += ss[r] * r

            if r1q_unset and (tt[r] > quota):
                r1q_unset = False
                r1q = r
                if not use_two_q:
                    # If not using the two-quota average score tie-breaker,
                    # leading part of the AQT score is the quota threshold rating
                    ratings[c] = (r1q, )
                    break

            if tt[r] > twoq:
                r2q = r
                tt_surplus = tt[r2q] - twoq
                break

        if use_two_q:
            # leading part of AQT score is quota threshold rating and average score
            # in the top two quota blocks
            ratings[c] = (r1q, (s - tt_surplus * r) / twoq)
        elif r1q_unset:
            # If not using the two-quota average score tie-breaker,
            # leading part of the AQT score is the quota threshold rating
            ratings[c] = (r1q, )

    scores = np.arange(maxscore + 1)
    if use_mj:  # Majority Judgment style approval quota threshold
        for c in remaining:
            ss = S[..., c]
            tt = T[..., c]
            dd = abs(tt - quota)

            ratings[c] = (*list(ratings[c]), *[(x, tt[x]) for x in np.array(
                sorted(np.compress(ss > 0, scores), key=(lambda x: dd[x])))])
    else:  # ER-Bucklin-ratings style approval quota threshold
        for c in remaining:
            tt = T[..., c]
            ratings[c] = (*list(ratings[c]), tt[r])

    ranking = sorted(remaining, key=(lambda c: ratings[c]), reverse=True)
    winner = ranking[0]
    winsum = T[ratings[winner][0], winner]

    if winsum >= quota:
        factor = (1. - quota / winsum)
    else:
        factor = 0

    return (winner, winsum, factor, ranking, ratings)
Пример #43
0
 def ymax(self, axis='s'):
     c, i = divmod(self.icount - 1, self.samples)
     data = np.compress(self.keep, self.y, axis=1)
     return np.max(data)
Пример #44
0
def compress(condition, a, axis=0):
    return N.compress(condition, a, axis)
Пример #45
0
 # volatility process paths
 v = SRD_generate_paths(x_disc, v0, kappa_v, theta_v,
                         sigma_v, T, M, I, rand, 2, cho_matrix)
 # index level process paths
 S = H93_index_paths(S0, r, v, 1, cho_matrix)
 for K in k_list:  # strikes
     # inner value matrix
     h = np.maximum(K - S, 0)
     # value/cash flow matrix
     V = np.maximum(K - S, 0)
     for t in xrange(M - 1, 0, -1):
         df = np.exp(-(r[t] + r[t + 1]) / 2 * dt)
         # select only ITM paths
         itm = np.greater(h[t], 0)
         relevant = np.nonzero(itm)
         rel_S = np.compress(itm, S[t])
         no_itm = len(rel_S)
         if no_itm == 0:
             cv = np.zeros((I), dtype=np.float)
         else:
             rel_v = np.compress(itm, v[t])
             rel_r = np.compress(itm, r[t])
             rel_V = (np.compress(itm, V[t + 1])
                        * np.compress(itm, df))
             matrix = np.zeros((D + 1, no_itm), dtype=np.float)
             matrix[10] = rel_S * rel_v * rel_r
             matrix[9] = rel_S * rel_v
             matrix[8] = rel_S * rel_r
             matrix[7] = rel_v * rel_r
             matrix[6] = rel_S ** 2
             matrix[5] = rel_v ** 2
Пример #46
0
 def average(self, axis='s'):
     """average of the whole curve"""
     c, i = divmod(self.icount - 1, self.samples)
     data = np.compress(self.keep, self.y, axis=1)
     return np.average(data)
def plot_3d_comp_Poisson(model,
                         data,
                         vmin=None,
                         vmax=None,
                         resid_range=None,
                         fig_num=None,
                         pop_ids=None,
                         residual='Anscombe',
                         adjust=True):
    """
    Poisson comparison between 3d model and data.


    model: 3-dimensional model SFS
    data: 3-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    """
    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(8, 10))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.95, right=0.95)

    modelmax = max(masked_model.sum(axis=sax).max() for sax in range(3))
    datamax = max(masked_data.sum(axis=sax).max() for sax in range(3))
    modelmin = min(masked_model.sum(axis=sax).min() for sax in range(3))
    datamin = min(masked_data.sum(axis=sax).min() for sax in range(3))
    max_toplot = max(modelmax, datamax)
    min_toplot = min(modelmin, datamin)

    if vmax is None:
        vmax = max_toplot
    if vmin is None:
        vmin = min_toplot
    extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot]

    # Calculate the residuals
    if residual == 'Anscombe':
        resids = [Inference.\
                  Anscombe_Poisson_residual(masked_model.sum(axis=2-sax),
                                            masked_data.sum(axis=2-sax),
                                            mask=vmin) for sax in range(3)]
    elif residual == 'linear':
        resids =[Inference.\
                 linear_Poisson_residual(masked_model.sum(axis=2-sax),
                                         masked_data.sum(axis=2-sax),
                                         mask=vmin) for sax in range(3)]
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)

    min_resid = min([r.min() for r in resids])
    max_resid = max([r.max() for r in resids])
    if resid_range is None:
        resid_range = max((abs(max_resid), abs(min_resid)))
    resid_extend = _extend_mapping[-resid_range <= min_resid,
                                   resid_range >= max_resid]

    if pop_ids is not None:
        if len(pop_ids) != 3:
            raise ValueError('pop_ids must be of length 3.')
        data_ids = model_ids = resid_ids = pop_ids
    else:
        data_ids = masked_data.pop_ids
        model_ids = masked_model.pop_ids

        if model_ids is None:
            model_ids = data_ids

        if model_ids == data_ids:
            resid_ids = model_ids
        else:
            resid_ids = None

    for sax in range(3):
        marg_data = masked_data.sum(axis=2 - sax)
        marg_model = masked_model.sum(axis=2 - sax)

        curr_ids = []
        for ids in [data_ids, model_ids, resid_ids]:
            if ids is None:
                ids = ['pop0', 'pop1', 'pop2']

            if ids is not None:
                ids = list(ids)
                del ids[2 - sax]

            curr_ids.append(ids)

        ax = pylab.subplot(4, 3, sax + 1)
        plot_colorbar = (sax == 2)
        plot_single_2d_sfs(marg_data,
                           vmin=vmin,
                           vmax=vmax,
                           pop_ids=curr_ids[0],
                           extend=extend,
                           colorbar=plot_colorbar)

        pylab.subplot(4, 3, sax + 4, sharex=ax, sharey=ax)
        plot_single_2d_sfs(marg_model,
                           vmin=vmin,
                           vmax=vmax,
                           pop_ids=curr_ids[1],
                           extend=extend,
                           colorbar=False)

        resid = resids[sax]
        pylab.subplot(4, 3, sax + 7, sharex=ax, sharey=ax)
        plot_2d_resid(resid,
                      resid_range,
                      pop_ids=curr_ids[2],
                      extend=resid_extend,
                      colorbar=plot_colorbar)

        ax = pylab.subplot(4, 3, sax + 10)
        flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()),
                                   resid.ravel())
        ax.hist(flatresid, bins=20, normed=True)
        ax.set_yticks([])
    pylab.show()
Пример #48
0
    def set_data(self, data, **args):
        if args.get("skipIfSame", 1):
            if checksum(data) == checksum(self.raw_data):
                return

        self.domain_data_stat = []
        self.attr_values = {}
        self.original_data = None
        self.scaled_data = None
        self.no_jittering_scaled_data = None
        self.valid_data_array = None

        self.raw_data = None
        self.have_data = False
        self.data_has_class = False
        self.data_has_continuous_class = False
        self.data_has_discrete_class = False
        self.data_class_name = None
        self.data_domain = None
        self.data_class_index = None

        if data is None:
            return
        full_data = data
        self.raw_data = data

        len_data = data and len(data) or 0

        self.attribute_names = [attr.name for attr in full_data.domain]
        self.attribute_name_index = dict([
            (full_data.domain[i].name, i) for i in range(len(full_data.domain))
        ])
        self.attribute_flip_info = {}

        self.data_domain = full_data.domain
        self.data_has_class = bool(full_data.domain.class_var)
        self.data_has_continuous_class = full_data.domain.has_continuous_class
        self.data_has_discrete_class = full_data.domain.has_discrete_class

        self.data_class_name = self.data_has_class and full_data.domain.class_var.name
        if self.data_has_class:
            self.data_class_index = self.attribute_name_index[
                self.data_class_name]
        self.have_data = bool(self.raw_data and len(self.raw_data) > 0)

        self.domain_data_stat = getCached(full_data, DomainBasicStats,
                                          (full_data, ))

        sort_values_for_discrete_attrs = args.get(
            "sort_values_for_discrete_attrs", 1)

        for index in range(len(full_data.domain)):
            attr = full_data.domain[index]
            if attr.is_discrete:
                self.attr_values[attr.name] = [0, len(attr.values)]
            elif attr.is_continuous:
                self.attr_values[attr.name] = [
                    self.domain_data_stat[index].min,
                    self.domain_data_stat[index].max
                ]

        if 'no_data' in args:
            return

        # the original_data, no_jittering_scaled_data and validArray are arrays
        # that we can cache so that other visualization widgets don't need to
        # compute it. The scaled_data on the other hand has to be computed for
        # each widget separately because of different
        # jitter_continuous and jitter_size values
        if getCached(data, "visualizationData"):
            self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached(
                data, "visualizationData")
        else:
            no_jittering_data = np.c_[full_data.X, full_data.Y].T
            valid_data_array = no_jittering_data != np.NaN
            original_data = no_jittering_data.copy()

            for index in range(len(data.domain)):
                attr = data.domain[index]
                if attr.is_discrete:
                    # see if the values for discrete attributes have to be resorted
                    variable_value_indices = get_variable_value_indices(
                        data.domain[index], sort_values_for_discrete_attrs)
                    if 0 in [
                            i == variable_value_indices[attr.values[i]]
                            for i in range(len(attr.values))
                    ]:
                        # make the array a contiguous, otherwise the putmask
                        # function does not work
                        line = no_jittering_data[index].copy()
                        indices = [
                            np.where(line == val, 1, 0)
                            for val in range(len(attr.values))
                        ]
                        for i in range(len(attr.values)):
                            np.putmask(line, indices[i],
                                       variable_value_indices[attr.values[i]])
                        no_jittering_data[
                            index] = line  # save the changed array
                        original_data[
                            index] = line  # reorder also the values in the original data
                    no_jittering_data[index] = (
                        (no_jittering_data[index] * 2.0 + 1.0) /
                        float(2 * len(attr.values)))

                elif attr.is_continuous:
                    diff = self.domain_data_stat[
                        index].max - self.domain_data_stat[
                            index].min or 1  # if all values are the same then prevent division by zero
                    no_jittering_data[index] = (
                        no_jittering_data[index] -
                        self.domain_data_stat[index].min) / diff

            self.original_data = original_data
            self.no_jittering_scaled_data = no_jittering_data
            self.valid_data_array = valid_data_array

        if data:
            setCached(data, "visualizationData",
                      (self.original_data, self.no_jittering_scaled_data,
                       self.valid_data_array))

        # compute the scaled_data arrays
        scaled_data = self.no_jittering_scaled_data

        # Random generators for jittering
        random = np.random.RandomState(seed=self.jitter_seed)
        rand_seeds = random.random_integers(0,
                                            2**30 - 1,
                                            size=len(data.domain))
        for index, rseed in zip(list(range(len(data.domain))), rand_seeds):
            # Need to use a different seed for each feature
            random = np.random.RandomState(seed=rseed)
            attr = data.domain[index]
            if attr.is_discrete:
                scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \
                                      (random.rand(len(full_data)) - 0.5)

            elif attr.is_continuous and self.jitter_continuous:
                scaled_data[index] += self.jitter_size / 50.0 * (
                    0.5 - random.rand(len(full_data)))
                scaled_data[index] = np.absolute(
                    scaled_data[index])  # fix values below zero
                ind = np.where(scaled_data[index] > 1.0, 1,
                               0)  # fix values above 1
                np.putmask(scaled_data[index], ind,
                           2.0 - np.compress(ind, scaled_data[index]))

        self.scaled_data = scaled_data[:, :len_data]
def plot_2d_comp_Poisson(model,
                         data,
                         vmin=None,
                         vmax=None,
                         resid_range=None,
                         fig_num=None,
                         pop_ids=None,
                         residual='Anscombe',
                         adjust=True,
                         saveplot=False,
                         nomplot="plot_2d_comp_Poisson",
                         showplot=True):
    """
    Poisson comparison between 2d model and data.


    model: 2-dimensional model SFS
    data: 2-dimensional data SFS
    vmin, vmax: Minimum and maximum values plotted for sfs are vmin and
                vmax respectively.
    resid_range: Residual plot saturates at +- resid_range.
    fig_num: Clear and use figure fig_num for display. If None, an new figure
             window is created.
    pop_ids: If not None, override pop_ids stored in Spectrum.
    residual: 'Anscombe' for Anscombe residuals, which are more normally
              distributed for Poisson sampling. 'linear' for the linear
              residuals, which can be less biased.
    adjust: Should method use automatic 'subplots_adjust'? For advanced
            manipulation of plots, it may be useful to make this False.
    """
    if data.folded and not model.folded:
        model = model.fold()

    masked_model, masked_data = Numerics.intersect_masks(model, data)

    if fig_num is None:
        f = pylab.gcf()
    else:
        f = pylab.figure(fig_num, figsize=(7, 7))

    pylab.clf()
    if adjust:
        pylab.subplots_adjust(bottom=0.07,
                              left=0.07,
                              top=0.94,
                              right=0.95,
                              hspace=0.26,
                              wspace=0.26)

    max_toplot = max(masked_model.max(), masked_data.max())
    min_toplot = min(masked_model.min(), masked_data.min())
    if vmax is None:
        vmax = max_toplot
    if vmin is None:
        vmin = min_toplot
    extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot]

    if pop_ids is not None:
        data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids
        if len(pop_ids) != 2:
            raise ValueError('pop_ids must be of length 2.')
    else:
        data_pop_ids = masked_data.pop_ids
        model_pop_ids = masked_model.pop_ids
        if masked_model.pop_ids is None:
            model_pop_ids = data_pop_ids

        if model_pop_ids == data_pop_ids:
            resid_pop_ids = model_pop_ids
        else:
            resid_pop_ids = None

    ax = pylab.subplot(2, 2, 1)
    plot_single_2d_sfs(masked_data,
                       vmin=vmin,
                       vmax=vmax,
                       pop_ids=data_pop_ids,
                       colorbar=False)
    ax.set_title('data')

    ax2 = pylab.subplot(2, 2, 2, sharex=ax, sharey=ax)
    plot_single_2d_sfs(masked_model,
                       vmin=vmin,
                       vmax=vmax,
                       pop_ids=model_pop_ids,
                       extend=extend)
    ax2.set_title('model')

    if residual == 'Anscombe':
        resid = Inference.Anscombe_Poisson_residual(masked_model,
                                                    masked_data,
                                                    mask=vmin)
    elif residual == 'linear':
        resid = Inference.linear_Poisson_residual(masked_model,
                                                  masked_data,
                                                  mask=vmin)
    else:
        raise ValueError("Unknown class of residual '%s'." % residual)

    if resid_range is None:
        resid_range = max((abs(resid.max()), abs(resid.min())))
    resid_extend = _extend_mapping[-resid_range <= resid.min(),
                                   resid_range >= resid.max()]

    ax3 = pylab.subplot(2, 2, 3, sharex=ax, sharey=ax)
    plot_2d_resid(resid,
                  resid_range,
                  pop_ids=resid_pop_ids,
                  extend=resid_extend)
    ax3.set_title('residuals')

    ax = pylab.subplot(2, 2, 4)
    flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()),
                               resid.ravel())
    ax.hist(flatresid, bins=20, normed=True)
    ax.set_title('residuals')
    ax.set_yticks([])
    if saveplot:
        nomplot = nomplot + ".pdf"
        pylab.savefig(nomplot)
    if showplot:
        pylab.show()
Пример #50
0
# ==== Set the integration method ====
mim = gf.MeshIm(m, gf.Integ('IM_TETRAHEDRON(5)'))

# ==== Summary ====
print(' ==================================== \n Mesh details: ')
print(' Problem dimension:', mfu.qdim(), '\n Number of elements: ', m.nbcvs(),
      '\n Number of nodes: ', m.nbpts())
print(' Number of dof: ', mfu.nbdof(), '\n Element type: ',
      mfu.fem()[0].char())
print(' ====================================')

# ==== Boundaries detection ====
allPoints = m.pts()
# Bottom points and faces
cbot = (abs(allPoints[2, :]) < 1e-6)
pidbot = np.compress(cbot, list(range(0, m.nbpts())))
fbot = m.faces_from_pid(pidbot)
BOTTOM = 1
m.set_region(BOTTOM, fbot)
# Top points and faces
ctop = (abs(allPoints[2, :]) > dimZ - stepZ)
pidtop = np.compress(ctop, list(range(0, m.nbpts())))
ftop = m.faces_from_pid(pidtop)
TOP = 2
m.set_region(TOP, ftop)
# Left points and faces
cleft = (abs(allPoints[0, :]) < 1e-6)
pidleft = np.compress(cleft, list(range(0, m.nbpts())))
fleft = m.faces_from_pid(pidleft)
LEFT = 3
m.set_region(LEFT, fleft)
Пример #51
0
 def from_contingency(self, cont, nan_adjustment):
     h_class = _entropy(np.sum(cont, axis=1))
     h_residual = _entropy(np.compress(np.sum(cont, axis=0), cont, axis=1))
     return nan_adjustment * (h_class - h_residual)
Пример #52
0
def main(proteinfilename, ligandfilename):

    U1 = MDAnalysis.Universe(proteinfilename)
    proteins = U1.select_atoms('protein and not type HD')
    proteincoods = proteins.positions

    U2 = MDAnalysis.Universe(ligandfilename)
    polaratoms = U2.select_atoms(
        'type N or type O or type F or type Cl or type Br')

    numpolaratoms = polaratoms.n_atoms

    Z = int(os.path.isfile('placedwaters.pdb'))
    if Z == 0:
        f1 = open('dockedwaters.pdb', 'w')
        f1.close()
        sys.exit('Empty file')

    U3 = MDAnalysis.Universe('placedwaters.pdb')
    trialwaters = U3.select_atoms('resname SOL and name OW')
    trialwatercoods = trialwaters.positions

    numtrialwaters = trialwatercoods.shape[0]
    waterscores = np.zeros((numtrialwaters), dtype=float)

    tempdist = MDAnalysis.lib.distances.distance_array(trialwatercoods,
                                                       proteincoods)
    watprodist = np.amin(tempdist, axis=1)

    for i in xrange(0, numtrialwaters):

        if watprodist[i] < 3.6 and watprodist[i] > 2.00:

            comd = 'vina --receptor ' + proteinfilename + ' --num_modes 1 --exhaustiveness 20 --ligand water.pdbqt --size_x 0.5 --size_y 0.5 --size_z 0.5 --out waterout.pdbqt --center_x ' + str(
                trialwatercoods[i, 0]) + ' --center_y ' + str(
                    trialwatercoods[i, 1]) + ' --center_z ' + str(
                        trialwatercoods[i, 2])
            os.system(comd)
            os.system("grep 'RESULT' waterout.pdbqt > water.txt")
            A = np.genfromtxt('water.txt', usecols=3, dtype=float)
            waterscores[i] = A
            os.remove('water.txt')
            os.remove('waterout.pdbqt')

    predictedwatercoods = np.compress(waterscores <= -0.6,
                                      trialwatercoods,
                                      axis=0)
    predictedwatercoods = np.float32(predictedwatercoods)
    numpredictedwaters = predictedwatercoods.shape[0]

    waterdata = np.genfromtxt('waterdetails.txt', dtype=int)
    predictedwaterscores1 = np.compress(waterscores <= -0.6,
                                        waterscores,
                                        axis=0)
    predictedwaterscores2 = np.reshape(predictedwaterscores1,
                                       (numpredictedwaters, 1))

    ##############################################################################################################
    ##############################################################################################################
    if numpredictedwaters > 1:
        fit = scipy.cluster.hierarchy.fclusterdata(predictedwatercoods,
                                                   2.0,
                                                   criterion='distance',
                                                   metric='euclidean')
        fit = fit.astype(int)
        numclust = np.max(fit)

        temppredictedwatercoods = np.zeros((numclust, 3), dtype=float)
        temppredictedwatercoods = np.float32(temppredictedwatercoods)
        temppredictedwaterscores = np.zeros((numclust, 1), dtype=float)

        for i in xrange(1, numclust + 1):
            clusttemp = np.compress(fit == i, predictedwatercoods, axis=0)
            tempavg = np.mean(clusttemp, axis=0)
            temppredictedwatercoods[i - 1, :] = tempavg

            clusttemp2 = np.compress(fit == i, predictedwaterscores2, axis=0)
            tempavg2 = np.mean(clusttemp2, axis=0)
            temppredictedwaterscores[i - 1, 0] = tempavg2

    elif numpredictedwaters <= 1:
        temppredictedwatercoods = predictedwatercoods.copy()
        temppredictedwaterscores = predictedwaterscores2.copy()

    ##############################################################################################################
    ##############################################################################################################

    allligand = U2.select_atoms('all')
    allligandcoods = allligand.positions
    numpredictedwaters = temppredictedwatercoods.shape[0]

    discardindex = np.zeros((numpredictedwaters, 1), dtype=float)

    count = 0
    for i in range(0, numpolaratoms):

        if waterdata.size > 2:
            atomindex = waterdata[i, 0]
            allowedwaters = waterdata[i, 1]

        elif waterdata.size == 2:
            atomindex = waterdata[0]
            allowedwaters = waterdata[1]

        atomcoods = np.zeros((1, 3), dtype=float)
        atomcoods[0, :] = allligandcoods[atomindex, :].copy()
        atomcoods = np.float32(atomcoods)

        atwatdist = MDAnalysis.lib.distances.distance_array(
            temppredictedwatercoods, atomcoods)
        B = np.where(atwatdist < 3.1)
        mates = np.ravel_multi_index(B, atwatdist.shape)
        nummates = np.size(mates)

        matescores = temppredictedwaterscores[mates]

        if nummates > allowedwaters:

            numdiscardedwaters = nummates - allowedwaters
            for j in xrange(0, numdiscardedwaters):

                high = np.argmax(matescores)
                removedindex = mates[high]
                matescores = np.delete(matescores, high)
                mates = np.delete(mates, high)
                discardindex[count, 0] = removedindex
                count = count + 1

    trimmeddiscardindex = discardindex[0:count, :].copy()
    trimmeddiscardindex = np.transpose(trimmeddiscardindex)
    trimmeddiscardindex = np.ndarray.astype(trimmeddiscardindex, dtype=int)

    clusteredwatercoods = np.delete(temppredictedwatercoods,
                                    trimmeddiscardindex,
                                    axis=0)
    finalwaterscores = np.delete(temppredictedwaterscores,
                                 trimmeddiscardindex,
                                 axis=0)

    writewaterfile('predictedwaters.pdb', clusteredwatercoods,
                   finalwaterscores)
Пример #53
0
def despine(fig=None, ax=None, top=True, right=True, left=False,
            bottom=False, offset=None, trim=False):
    """Remove the top and right spines from plot(s).

    fig : matplotlib figure, optional
        Figure to despine all axes of, default uses current figure.
    ax : matplotlib axes, optional
        Specific axes object to despine.
    top, right, left, bottom : boolean, optional
        If True, remove that spine.
    offset : int or dict, optional
        Absolute distance, in points, spines should be moved away
        from the axes (negative values move spines inward). A single value
        applies to all spines; a dict can be used to set offset values per
        side.
    trim : bool, optional
        If True, limit spines to the smallest and largest major tick
        on each non-despined axis.

    Returns
    -------
    None

    """
    # Get references to the axes we want
    if fig is None and ax is None:
        axes = plt.gcf().axes
    elif fig is not None:
        axes = fig.axes
    elif ax is not None:
        axes = [ax]

    for ax_i in axes:
        for side in ["top", "right", "left", "bottom"]:
            # Toggle the spine objects
            is_visible = not locals()[side]
            ax_i.spines[side].set_visible(is_visible)
            if offset is not None and is_visible:
                try:
                    val = offset.get(side, 0)
                except AttributeError:
                    val = offset
                _set_spine_position(ax_i.spines[side], ('outward', val))

        # Potentially move the ticks
        if left and not right:
            maj_on = any(
                t.tick1line.get_visible()
                for t in ax_i.yaxis.majorTicks
            )
            min_on = any(
                t.tick1line.get_visible()
                for t in ax_i.yaxis.minorTicks
            )
            ax_i.yaxis.set_ticks_position("right")
            for t in ax_i.yaxis.majorTicks:
                t.tick2line.set_visible(maj_on)
            for t in ax_i.yaxis.minorTicks:
                t.tick2line.set_visible(min_on)

        if bottom and not top:
            maj_on = any(
                t.tick1line.get_visible()
                for t in ax_i.xaxis.majorTicks
            )
            min_on = any(
                t.tick1line.get_visible()
                for t in ax_i.xaxis.minorTicks
            )
            ax_i.xaxis.set_ticks_position("top")
            for t in ax_i.xaxis.majorTicks:
                t.tick2line.set_visible(maj_on)
            for t in ax_i.xaxis.minorTicks:
                t.tick2line.set_visible(min_on)

        if trim:
            # clip off the parts of the spines that extend past major ticks
            xticks = ax_i.get_xticks()
            if xticks.size:
                firsttick = np.compress(xticks >= min(ax_i.get_xlim()),
                                        xticks)[0]
                lasttick = np.compress(xticks <= max(ax_i.get_xlim()),
                                       xticks)[-1]
                ax_i.spines['bottom'].set_bounds(firsttick, lasttick)
                ax_i.spines['top'].set_bounds(firsttick, lasttick)
                newticks = xticks.compress(xticks <= lasttick)
                newticks = newticks.compress(newticks >= firsttick)
                ax_i.set_xticks(newticks)

            yticks = ax_i.get_yticks()
            if yticks.size:
                firsttick = np.compress(yticks >= min(ax_i.get_ylim()),
                                        yticks)[0]
                lasttick = np.compress(yticks <= max(ax_i.get_ylim()),
                                       yticks)[-1]
                ax_i.spines['left'].set_bounds(firsttick, lasttick)
                ax_i.spines['right'].set_bounds(firsttick, lasttick)
                newticks = yticks.compress(yticks <= lasttick)
                newticks = newticks.compress(newticks >= firsttick)
                ax_i.set_yticks(newticks)
def main():

    parser = argparse.ArgumentParser(
        description=
        'This script takes a single zarr zipstore, and estimates the contamination rate, providing a log'
        'likelihood ratio vs the null model')

    parser.add_argument(
        '--input',
        required=True,
        help=
        'Path to zarr file containing genotypes and allele depths, zipped Zarr file with data for a single sample.'
        'This should follow the standard format of {sample}/{seqid}/calldata/GT and {sample}/{seqid}/calldata/AD.'
    )

    parser.add_argument(
        '--sites',
        required=True,
        help=
        'Path to zarr describing which sites in `input` were genotyped. This is used to match the `input` to the'
        'allele frequencies below. variants/POS is required.')

    parser.add_argument(
        '--allele-frequencies',
        required=True,
        help=
        'path to zarr file describing allele frequencies. This has two purposes: 1) to select SNPs to downsample'
        'to, based on the `minimum_af` argument. 2) To provide a prior expectation on the frequency of genotypes.'
        'The first level of the zarr file should be groups for seqids, with each containing `POS` (position) and'
        'AF (allele frequencies). The shape of the AF array must be Nx4, where N is the size of the 1D POS array.'
        'The order of alleles *must* correspond to the coding in the input data. There is no requirement to have a'
        'similar shape to the input genotypes, although a minimum level of intersection is required!'
    )

    parser.add_argument('--seqid',
                        required=True,
                        nargs='+',
                        help='name of chromosome(s) or contig(s) to process. ')

    parser.add_argument('--output',
                        required=True,
                        help='path to output file stem')

    parser.add_argument('--downsample',
                        required=False,
                        default=20000,
                        help='number of sites to consider.',
                        type=int)

    parser.add_argument(
        '--minimum-af',
        required=False,
        default=0.05,
        help=
        'minimum minor allele frequency in reference population to consider. Sites with higher MAF are more '
        'powerful at detecting contamination',
        type=float)

    parser.add_argument('--sequence-error-rate',
                        required=False,
                        default=1e-3,
                        help='probability of observing a non REF/ALT base',
                        type=float)

    parser.add_argument(
        '--minimum-coverage',
        required=False,
        default=10,
        help=
        'minimum read depth to use. Low depths have low power to detect contamination',
        type=int)

    parser.add_argument('--plot', dest='plot', action='store_true')
    parser.add_argument('--no-plot', dest='plot', action='store_false')

    parser.add_argument('--log', dest='log', action='store_true')
    parser.add_argument('--no-log', dest='log', action='store_false')

    parser.set_defaults(plot=True, log=False)

    try:
        args = {
            "input": snakemake.input.input,
            "sites": snakemake.input.sites,
            "allele_frequencies": snakemake.input.allele_frequencies,
            "seqid": snakemake.params.seqid,
            "output": snakemake.params.stem,
            "minimum_af": snakemake.params.minimum_af,
            "minimum_coverage": snakemake.params.minimum_coverage,
            "sequence_error_rate": snakemake.params.seq_err_rate,
            "downsample": snakemake.params.downsample,
            "plot": snakemake.params.plot,
            "log": snakemake.params.log
        }
        log("Args read via snakemake")
    except NameError:
        args = vars(parser.parse_args())
        log("Args read via command line")

    seqids = args['seqid']
    sequence_error_rate = args['sequence_error_rate']
    downsample_n = args["downsample"]
    minimum_minor_af = args["minimum_af"]

    output_csv = args['output'] + ".contamination.csv"
    output_png = args['output'] + ".allele_balance.png"
    output_log = args["output"] + ".{alpha}.log"

    sample_store = zarr.ZipStore(args["input"], mode="r")
    sample_callset = zarr.Group(sample_store)

    sites = zarr.ZipStore(args["sites"], mode="r")
    variant_sites = zarr.Group(sites)
    sample = next(iter(sample_callset))

    concatenated_sample_callset, _ = concatenate_arrays(
        sample_callset[sample], seqids, paths=["calldata/GT", "calldata/AD"])

    gt = allel.GenotypeArray(concatenated_sample_callset["calldata/GT"])
    ad = concatenated_sample_callset["calldata/AD"]

    concatenated_sites, concatenated_site_shapes = concatenate_arrays(
        variant_sites, seqids, ["variants/POS"])
    pos = concatenated_sites["variants/POS"]
    assert pos.shape[0] == gt.shape[0] == ad.shape[
        0], "Shape inconsistency. {0}, {1}, {2}".format(
            pos.shape, gt.shape, ad.shape)

    # load allele frequencies required to compute weights
    allele_frequencies_z = zarr.open_group(args['allele_frequencies'], "r")
    concatenated_af_arrays, concatenated_af_shapes = concatenate_arrays(
        allele_frequencies_z, seqids, ["POS", "AF"])
    af_pos = concatenated_af_arrays["POS"]
    # This is a 2D array of the frequency of the ALT allele in some other dataset.
    af_val = concatenated_af_arrays["AF"]
    assert af_val.shape[
        1] == 4, "Allele frequencies must contain all 4 alleles, even if unobserved."

    # for the sample_gt: Keep if
    # a) in af, b) is_called and c) is_biallelic
    # step 1 find the intersection this works on multi indexes
    loc_gt, loc_af = locate_intersection(pos, concatenated_site_shapes, af_pos,
                                         concatenated_af_shapes)

    flt_af_val = np.compress(loc_af, af_val, axis=0)
    flt_gt = np.compress(loc_gt, gt, axis=0)
    flt_ad = np.compress(loc_gt, ad, axis=0)

    # now we need to filter both by is biallelic and is called.
    is_bial_ref_pop = np.count_nonzero(flt_af_val, axis=1) == 2
    is_called = flt_gt.is_called()[:, 0]

    # compress the intersection by the AND of these
    keep_loc = is_called & is_bial_ref_pop
    alt_frequency_pass = np.compress(keep_loc, flt_af_val, axis=0)
    allele_depth_pass = np.compress(keep_loc, flt_ad, axis=0)

    # recode the allele depth to 0/1.
    # find the "alt" column.
    log("Ordering alleles by frequency for REF/ALT/ERR")
    min_cov_reached = allele_depth_pass[:, 0].sum(
        axis=1) >= args['minimum_coverage']

    ix_cols_sort = np.argsort(alt_frequency_pass, axis=1)[:, ::-1]

    # indices of all rows
    ix_rows = np.arange(alt_frequency_pass.shape[0])

    # apply the sorting operation
    allele_depth_pass_reordered = np.squeeze(allele_depth_pass)[
        ix_rows[:, np.newaxis], ix_cols_sort]

    # Define allele counts: sum final 2 columns, representing ref/alt/error
    allele_depths = allele_depth_pass_reordered[:, :3]
    allele_depths[:,
                  2] = allele_depths[:, 2] + allele_depth_pass_reordered[:, 3]
    assert allele_depths.shape[1] == 3

    # issue with some samples having a third allele (ie not in phase 2) discovered at high frequency
    # Filter sites where more than 10% of reads look like errors.
    probably_biallelic = allele_depth_pass_reordered[:, 2] < (
        .1 * allele_depth_pass_reordered.sum(axis=1))

    # step 2 create the 0/1/2 from the allele frequencies.
    major_af = alt_frequency_pass.max(axis=1)

    # select the values with the highest MAF.
    log("Selecting variants on which to perform analysis")
    while True:
        eligible = probably_biallelic & min_cov_reached & (
            (1 - major_af) > minimum_minor_af)
        if eligible.sum() > downsample_n:
            break

        minimum_minor_af -= 0.01
        if minimum_minor_af < 0:
            log("Insufficient variants meet criteria to compute contamination. n={0}, min={1}"
                .format(eligible.sum(), downsample_n))
            break

    res = pd.DataFrame(index=[sample], columns=["LLR", "LL", "pc_contam"])

    if eligible.sum() > downsample_n:
        log("Downsample from {0} to {1}".format(eligible.sum(), downsample_n))

        ix_ds = np.sort(
            np.random.choice(np.where(eligible)[0], size=downsample_n))

        major_af = np.take(major_af, ix_ds, axis=0)
        allele_depths = np.take(allele_depths, ix_ds, axis=0)

        genotype_weights = np.log(determine_weights(major_af))

        log("estimating contamination...")
        xv = minimize_scalar(compute_likelihood,
                             args=(sequence_error_rate, allele_depths,
                                   genotype_weights, args["log"], output_log),
                             bounds=(0, 0.5),
                             method="Bounded",
                             options={"xatol": 1e-6})

        # compute the likelihood at alpha = 0, to report likelihood ratio.
        null = compute_likelihood(0.0, sequence_error_rate, allele_depths,
                                  genotype_weights, args["log"], output_log)

        # return the llr / ll / estimate
        res.loc[sample] = -min(xv.fun - null, 0), -xv.fun, xv.x * 100

        if args['plot']:
            plot_allele_balance(flt_gt, flt_ad, output_png, res.iloc[0])

    res.to_csv(output_csv)
Пример #55
0
            n3 = np.sum(np.where(dr > t2, 1, 0))
            assert npks == n2, "debug scoring"
            assert n3 == len(all_gvecs) - npks, "debug scoring"
            print(l.r2c)
            print("Unit cell:", (6 * "%.6f ") % indexing.ubitocellpars(l.r2c))

            # Put this grain in the output list
            ubis.append(l.r2c)

            # Remove from the gvectors
            drlv2 = indexing.calc_drlv2(l.r2c, cur_gvecs)
            # print drlv2[:20]
            # print cur_gvecs.shape
            # print drlv2.shape,drlv2[:10],options.tol*options.tol

            cur_gvecs = rc_array.rc_array(np.compress(
                drlv2 > options.tol * options.tol, cur_gvecs, axis=0),
                                          direction='row')
            print("Lattice found, indexes", npks, "from all", all)
            print("Number of unindexed peaks remaining %d" % (len(cur_gvecs)))
            print("Current vector shape", cur_gvecs.shape)
        if len(ubis) > 0:
            indexing.write_ubi_file(options.outfile, ubis)
            print("Wrote to file", options.outfile)
        else:
            print("No unit cell found, sorry, please try again")
    except:
        if len(ubis) > 0:
            indexing.write_ubi_file(options.outfile, ubis)
            print("Wrote to file", options.outfile)
        raise
Пример #56
0
    def run_one_perf_test(self):
        """Method to run a neutral benchmark on a uniform model with the previous selected feature set.
            We want to see which feature set is the best or has the most information.

            
            """
        # Get data
        traindata, testdata = self.trainData, self.testData

        X, y = traindata
        # Check if feature set as more than one feature
        if np.array(self.featset).ndim > 1:
            self.featset = self.featset[0]
        # reduce our data set to the selected features
        X = np.compress(self.featset, X, axis=1)

        # Neutral model
        model = sklearn.linear_model.LogisticRegression(
            multi_class="multinomial", solver="newton-cg", fit_intercept=True
        )
        tuned_parameters = [{"C": np.logspace(-6, 4, 11)}]
        cv = 5

        # We use gridsearch to find good parameters
        gridsearch = sklearn.model_selection.GridSearchCV(
            model, tuned_parameters, scoring=None, n_jobs=1, cv=cv, iid=True
        )
        gridsearch.fit(X, y)
        est = gridsearch.best_estimator_

        # Record scores
        trainScore = est.score(X, y)
        traindec = est.decision_function(X)
        trainpredict = est.predict(X)
        trainy = y
        # Scores on the testset
        X_test, y_test = testdata
        X_test = np.compress(self.featset, X_test, axis=1)

        testpredict = est.predict(X_test)

        testscore = est.score(X_test, y_test)

        # We save the decision function, we can calculate ROC curves later in the analysis
        # TODO: do we need the decision function?, are there alternatives for ord. Regr?
        testdec = est.decision_function(X_test)
        testy = y_test

        result = Result_Performance(
            modelname=self.modelname,
            setname=self.setname,
            trainScore=trainScore,
            testScore=testscore,
            traindec=traindec,
            trainpredict=trainpredict,
            trainy=trainy,
            testdec=testdec,
            testpredict=testpredict,
            testy=testy,
        )
        self.result_performance = result

        return self
Пример #57
0
def cokernel(A, tol=1e-5):
    u, s, vh = np.linalg.svd(A)
    sing=np.zeros(u.shape[1],dtype=np.complex)
    sing[:s.size]=s
    null_mask = (sing <= tol)
    return np.compress(null_mask, u, axis=1)
Пример #58
0
    def clip_data(self, data):
        """ Returns a list of data values that are within the range.

        Implements AbstractDataRange.
        """
        return compress(self.mask_data(data), data, axis=0)
Пример #59
0
def vizq(_ra, _dec, catalogue, radius):
    ''' Query vizquery '''
    _site = 'vizier.u-strasbg.fr'
    cat = {
        'usnoa2': ['I/252/out', 'USNO-A2.0', 'Rmag'],
        '2mass': ['II/246/out', '2MASS', 'Jmag'],
        'landolt': ['II/183A/table2', '', 'Vmag,B-V,U-B,V-R,R-I,Star,e_Vmag'],
        'ucac4': [
            'I/322A/out', '',
            'Bmag,Vmag,gmag,rmag,imag,e_Vmag,e_Bmag,e_gmag,e_rmag,e_imag,UCAC4'
        ],
        'apass': [
            'II/336/apass9', '',
            "Bmag,Vmag,g'mag,r'mag,i'mag,e_Vmag,e_Bmag,e_g'mag,e_r'mag,e_i'mag"
        ],
        'usnob1': ['I/284/out', 'USNO-B1.0', 'R2mag'],
        'sdss7': ['II/294/sdss7', '', 'objID,umag,gmag,rmag,imag,zmag,gc'],
        'sdss9': [
            'V/139/sdss9', '',
            'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc'
        ],
        'sdss7': [
            'II/294/sdss7', '',
            'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc'
        ],
        'sdss8': [
            'II/306/sdss8', '',
            'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc'
        ]
    }

    a=os.popen('vizquery -mime=tsv  -site='+_site+' -source='+cat[catalogue][0]+\
                   ' -c.ra='+str(_ra)+' -c.dec='+str(_dec)+' -c.eq=J2000 -c.rm='+str(radius)+\
                   ' -c.geom=b -oc.form=h -sort=_RA*-c.eq -out.add=_RAJ2000,_DEJ2000 -out.max=10000 -out='+\
                   cat[catalogue][1]+' -out="'+cat[catalogue][2]+'"').read()
    print 'vizquery -mime=tsv  -site='+_site+' -source='+cat[catalogue][0]+\
                   ' -c.ra='+str(_ra)+' -c.dec='+str(_dec)+' -c.eq=J2000 -c.rm='+str(radius)+\
                   ' -c.geom=b -oc.form=h -sort=_RA*-c.eq -out.add=_RAJ2000,_DEJ2000 -out.max=10000 -out='+\
                   cat[catalogue][1]+' -out="'+cat[catalogue][2]+'"'
    aa = a.split('\n')
    bb = []
    for i in aa:
        if i and i[0] != '#': bb.append(i)
    _ra, _dec, _name, _mag = [], [], [], []
    for ii in bb[3:]:
        aa = ii.split('\t')

        rr, dd = deg2HMS(ra=re.sub(' ', ':', aa[0]),
                         dec=re.sub(' ', ':', aa[1]),
                         round=False)
        _ra.append(rr)
        _dec.append(dd)
        _name.append(aa[2])
    dictionary = {'ra': _ra, 'dec': _dec, 'id': _name}
    sss = string.split(cat[catalogue][2], ',')
    for ii in sss:
        dictionary[ii] = []
    for ii in bb[3:]:
        aa = ii.split('\t')
        for gg in range(0, len(sss)):
            if sss[gg] not in ['UCAC4', 'id']:
                try:
                    dictionary[sss[gg]].append(float(aa[2 + gg]))
                except:
                    dictionary[sss[gg]].append(float(9999))
            else:
                dictionary[sss[gg]].append(str(aa[2 + gg]))

    if catalogue in ['sdss7', 'sdss9', 'sdss8']:
        dictionary['u'] = dictionary['umag']
        dictionary['g'] = dictionary['gmag']
        dictionary['r'] = dictionary['rmag']
        dictionary['i'] = dictionary['imag']
        dictionary['z'] = dictionary['zmag']
        dictionary['uerr'] = dictionary['e_umag']
        dictionary['gerr'] = dictionary['e_gmag']
        dictionary['rerr'] = dictionary['e_rmag']
        dictionary['ierr'] = dictionary['e_imag']
        dictionary['zerr'] = dictionary['e_zmag']
        for key in dictionary.keys():
            if key != 'r':
                dictionary[key] = np.compress(
                    (np.array(dictionary['r']) < 19) &
                    (np.array(dictionary['r'] > 10)), dictionary[key])
        dictionary['r'] = np.compress((np.array(dictionary['r']) < 19) &
                                      (np.array(dictionary['r'] > 10)),
                                      dictionary['r'])

    elif catalogue == 'landolt':
        dictionary['B'] = np.array(dictionary['Vmag']) + np.array(
            dictionary['B-V'])
        dictionary['U'] = np.array(dictionary['B']) + np.array(
            dictionary['U-B'])
        dictionary['V'] = np.array(dictionary['Vmag'])
        dictionary['Verr'] = np.array(dictionary['e_Vmag'])
        dictionary['R'] = np.array(dictionary['Vmag']) - np.array(
            dictionary['V-R'])
        dictionary['I'] = np.array(dictionary['R']) - np.array(
            dictionary['R-I'])
        dictionary['id'] = np.array(dictionary['Star'])
    elif catalogue == 'ucac4':
        dictionary['B'] = np.array(dictionary['Bmag'])
        dictionary['V'] = np.array(dictionary['Vmag'])
        dictionary['g'] = np.array(dictionary['gmag'])
        dictionary['r'] = np.array(dictionary['rmag'])
        dictionary['i'] = np.array(dictionary['imag'])
        dictionary['Berr'] = np.array(dictionary['e_Bmag'], float) / 100.
        dictionary['Verr'] = np.array(dictionary['e_Vmag'], float) / 100.
        dictionary['gerr'] = np.array(dictionary['e_gmag'], float) / 100.
        dictionary['rerr'] = np.array(dictionary['e_rmag'], float) / 100.
        dictionary['ierr'] = np.array(dictionary['e_imag'], float) / 100.
        dictionary['id'] = np.array(dictionary['UCAC4'], str)
        for key in dictionary.keys():
            if key != 'r':
                dictionary[key] = np.compress(
                    (np.array(dictionary['r']) < 22) &
                    (np.array(dictionary['r'] > 10.5)), dictionary[key])
        dictionary['r'] = np.compress((np.array(dictionary['r']) < 22) &
                                      (np.array(dictionary['r'] > 10.5)),
                                      dictionary['r'])
    elif catalogue == 'apass':
        dictionary['B'] = np.array(dictionary['Bmag'])
        dictionary['V'] = np.array(dictionary['Vmag'])
        dictionary['g'] = np.array(dictionary["g'mag"])
        dictionary['r'] = np.array(dictionary["r'mag"])
        dictionary['i'] = np.array(dictionary["i'mag"])
        dictionary['Berr'] = np.array(dictionary['e_Bmag'], float)
        dictionary['Verr'] = np.array(dictionary['e_Vmag'], float)
        dictionary['gerr'] = np.array(dictionary["e_g'mag"], float)
        dictionary['rerr'] = np.array(dictionary["e_r'mag"], float)
        dictionary['ierr'] = np.array(dictionary["e_i'mag"], float)
        for key in dictionary.keys():
            if key != 'r':
                dictionary[key] = np.compress(
                    (np.array(dictionary['r']) < 22) &
                    (np.array(dictionary['r'] > 10.5)), dictionary[key])
        dictionary['r'] = np.compress((np.array(dictionary['r']) < 22) &
                                      (np.array(dictionary['r'] > 10.5)),
                                      dictionary['r'])
    return dictionary
Пример #60
0
header = nc.variables['header'][:]

for icol, obstr in enumerate(nc.variables['obdata'].obinfo.split()):
    if obstr.startswith('P'): break
press = nc.variables['obdata'][:, icol]
for icol, obstr in enumerate(nc.variables['obdata'].obinfo.split()):
    if obstr.startswith(obtype): break
obs = nc.variables['obdata'][:, icol]
bufrerr = nc.variables['oberr'][:, icol]

for icol, obstr in enumerate(nc.variables['gsigesdata'].diaginfo.split()):
    if obstr.startswith(obtype): break

gsiges = nc.variables['gsigesdata'][:, icol]
gsianl = nc.variables['gsianldata'][:, icol]
enssprd = nc.variables['gsi_ensstd'][:, icol]
gsierr = nc.variables['gsierr'][:, icol]
used = (nc.variables['gsiqc'][:, icol]).astype('bool')

# find indices corresponding to specified obcode, pressure level
idx = np.argwhere( \
   np.logical_and( header[:,4] == obcode, \
   np.abs(level-press) <= 1.0) \
   ).squeeze()
idx = np.compress(used[idx], idx)  # only select obs used by GSI
print 'count = ', len(idx)
print 'RMS ges departure', np.sqrt(np.mean((obs - gsiges)[idx]**2))
print 'expected ges departure', np.sqrt(np.mean(
    (enssprd**2 + bufrerr**2)[idx]))
print 'RMS anl departure', np.sqrt(np.mean((obs - gsianl)[idx]**2))