def _findRobots(self): """ Finds the robots amoung the edges found """ ## for each right edge find the next closest left edge. This forms an edge pair that could be robot self.Robots = list() if len(self.RightEdges) == 0 or len(self.LeftEdges) == 0: return for rightedge in self.RightEdges: leftedge = self.LeftEdges[0] i = 1 while leftedge < rightedge: if i >= len(self.LeftEdges): break leftedge = self.LeftEdges[i] i = i + 1 ## now calculate the distance between the two edges distance = self.__calculateDistanceBetweenEdges(leftedge, rightedge) if distance > self.MINIMUM_NAO_WIDTH and distance < self.MAXIMUM_NAO_WIDTH: x = self.CartesianData[0,rightedge:leftedge+1] y = self.CartesianData[1,rightedge:leftedge+1] r = self.PolarData[0,rightedge:leftedge+1] c = numpy.less(r, 409.5) x = numpy.compress(c, x) y = numpy.compress(c, y) robotx = self.__averageObjectDistance(x) roboty = self.__averageObjectDistance(y) c = numpy.logical_and(numpy.less(numpy.fabs(x - robotx), self.MAXIMUM_NAO_WIDTH), numpy.less(numpy.fabs(y - roboty), self.MAXIMUM_NAO_WIDTH)) x = numpy.compress(c, x) y = numpy.compress(c, y) robotr = math.sqrt(robotx**2 + roboty**2) robotbearing = math.atan2(roboty, robotx) self.Robots.append(Robot(robotx, roboty, robotr, robotbearing, x, y))
def utest( self, score ): """ Gives the Mann-Withney U test probability that the score is random. See: Mason & Graham (2002) Areas beneath the relative operating characteristics (ROC) and relative operating levels (ROL) curves: Statistical significance and interpretation Note (1): P-values below ~1e-16 are reported as 0.0. See zprob() in Biskit.Statistics.stats! Note (2): the P-value does not distinguish between positive and negative deviations from random -- a ROC area of 0.1 will get the same P-value as a ROC area of 0.9. @param score: the score predicted for each item @type score: [ float ] @return: 1-tailed P-value @rtype: float """ sample1 = N.compress( self.positives, score ) sample1 = sample1[-1::-1] # invert order sample2 = N.compress( N.logical_not( self.positives ), score ) sample2 = sample2[-1::-1] # invert order sample1 = sample1.tolist() sample2 = sample2.tolist() p = stats.mannwhitneyu( sample1, sample2 ) return p[1]
def whiskers_and_fliers(x, q1, q3, transformout=None): wnf = {} if transformout is None: transformout = lambda x: x iqr = q3 - q1 # get low extreme loval = q1 - (1.5 * iqr) whislo = np.compress(x >= loval, x) if len(whislo) == 0 or np.min(whislo) > q1: whislo = q1 else: whislo = np.min(whislo) # get high extreme hival = q3 + (1.5 * iqr) whishi = np.compress(x <= hival, x) if len(whishi) == 0 or np.max(whishi) < q3: whishi = q3 else: whishi = np.max(whishi) wnf['fliers'] = np.hstack([ transformout(np.compress(x < whislo, x)), transformout(np.compress(x > whishi, x)) ]) wnf['whishi'] = transformout(whishi) wnf['whislo'] = transformout(whislo) return wnf
def doit(input_file, output_file, regularization, wants_normalization): # Read the entire file. data= tuple(tuple(map(float, line.split(','))) for line in input_file) if len(data) == 0: print("no data", file=sys.stderr) return # Create X and Y indices. Assume the last column contains the output and # the rest contain the inputs. y_index= len(data[0]) - 1 x_indices= tuple(range(y_index)) # Create and print the model parameters, normalizing the data if requested. data= np.array(data) x= np.compress(as_bools(x_indices), data, 1) mu= list(it.repeat(0.0, x.shape[1])) sigma= list(it.repeat(1.0, x.shape[1])) if wants_normalization: for i in range(x.shape[1]): mu[i]= np.mean(x[:,i]) sigma[i]= np.std(x[:,i]) if sigma[i] == 0.0: sigma[i]= 1.0 x[:,i]= (x[:,i] - mu[i]) / sigma[i] y= np.compress(as_bools(y_index), data, 1).squeeze() model= MinimizedModel(x, y, regularization, mu, sigma) print(model, file=output_file)
def setFlaggedImageRange(self): (nx,ny) = self.raw_image.shape num_elements = nx * ny if self._flags_array is None: if not self._nan_flags_array is None: flags_array = self._nan_flags_array.copy() else: flags_array = numpy.zeros((nx,ny),int); else: flags_array = self._flags_array.copy() if not self._nan_flags_array is None: flags_array = flags_array + self._nan_flags_array flattened_flags = numpy.reshape(flags_array,(num_elements,)) if self.raw_image.dtype == numpy.complex64 or self.raw_image.dtype == numpy.complex128: real_array = self.raw_image.real imag_array = self.raw_image.imag flattened_real_array = numpy.reshape(real_array.copy(),(num_elements,)) flattened_imag_array = numpy.reshape(imag_array.copy(),(num_elements,)) real_flagged_array = numpy.compress(flattened_flags == 0, flattened_real_array) imag_flagged_array = numpy.compress(flattened_flags == 0, flattened_imag_array) flagged_image = numpy.zeros(shape=real_flagged_array.shape,dtype=self.raw_image.dtype) flagged_image.real = real_flagged_array flagged_image.imag = imag_flagged_array else: flattened_array = numpy.reshape(self.raw_image.copy(),(num_elements,)) flagged_image = numpy.compress(flattened_flags == 0, flattened_array) self.setImageRange(flagged_image)
def fit_gauss_to_hist(binheights, binedges, binerrors, fitmin=0, fitmax=None, p0=None, # guesses for norm, mu, sigma fitcolor="r"): left_binedges = binedges[:-1] if fitmax == None: fitmax = np.max(binedges) # cut data to values needed for fitting cut_mask = (left_binedges>fitmin)*(left_binedges<fitmax) fitx = np.compress(cut_mask, left_binedges) fity = np.compress(cut_mask, binheights) cut_binerrors = np.compress(cut_mask, binerrors) # p0 = [200.,meanguess,10.] popt, pcov = scipy.optimize.curve_fit(gauss, fitx, fity, sigma=cut_binerrors, absolute_sigma=True, p0=p0) perr = np.sqrt(np.diag(pcov)) # draw fitfunction xbase = np.linspace(fitmin,fitmax,1000) plt.plot(xbase, gauss(xbase, popt[0], popt[1], popt[2]), color=fitcolor, linewidth=2.) print "optimized norm, mu, sigma:\n", popt print "corresponding errors\n", np.sqrt(np.diag(pcov)) print "corresponding covariance matrix:\n", pcov return popt, pcov
def _addChildren(self, parent_node_num, I, cur_depth, right_mask, left_mask, y): """Modifies self.nodes_dict, self.stack """ # do the right branch r_tmp = numpy.compress(right_mask, y) if (r_tmp.shape[0] > 0): # then there is a reason to add a right child r_node_num = self.num_nodes r_child = TreeNode() r_child.parent = parent_node_num r_child.constval = numpy.average(r_tmp) self.nodes_dict[parent_node_num].Rchild = r_node_num self.nodes_dict[r_node_num] = r_child self.stack.append( self.StackEntry(r_node_num, cur_depth+1,\ numpy.compress(right_mask, I))) self.num_nodes += 1 # do the left branch l_tmp = numpy.compress(left_mask, y) if (l_tmp.shape[0] > 0): l_node_num = self.num_nodes l_child = TreeNode() l_child.parent = parent_node_num l_child.constval = numpy.average(l_tmp) self.nodes_dict[parent_node_num].Lchild = l_node_num self.nodes_dict[l_node_num] = l_child self.stack.append( self.StackEntry(l_node_num, cur_depth+1,\ numpy.compress(left_mask, I))) self.num_nodes += 1
def getMaxPoints(arr): # [TODO] Work out for RGB rather than array, and maybe we don't need the filter, but hopefully speeds it up. # Reference http://scipy-cookbook.readthedocs.io/items/FiltFilt.html arra = filtfilt(b,a,arr) maxp = maxpoints(arra, order=(len(arra)/20), mode='wrap') minp = minpoints(arra, order=(len(arra)/20), mode='wrap') points = [] for i in range(3): mas = np.equal(np.greater_equal(maxp,(i*(len(arra)/3))), np.less_equal(maxp,((i+1)*len(arra)/3))) k = np.compress(mas[0], maxp) if len(k)==0: continue points.append(sum(k)/len(k)) if len(points) == 1: return points, [] points = np.compress(np.greater_equal(arra[points],(max(arra)-min(arra))*0.40 + min(arra)),points) rifts = [] for i in range(len(points)-1): mas = np.equal(np.greater_equal(minp, points[i]),np.less_equal(minp,points[i+1])) k = np.compress(mas[0], minp) rifts.append(k[arra[k].argmin()]) return points, rifts
def calculate_switch_length(inheritance, positions, ignore_size=0, index_only=False): assert inheritance.shape[0] == positions.size # only 1s and 2s are relevant exclude = np.any(inheritance < 3, axis=1) inh_copy = np.compress(exclude, inheritance.copy(), axis=0) forgiven = [forgive(col, ignore_size) for col in inh_copy.T] switches = [derive_position_switch_array(np.compress(fgv, col)) for col, fgv in zip(inh_copy.T, forgiven)] filtered_pos = None if index_only: mean_length = [np.mean(s) for s in switches] medi_length = [np.median(s) for s in switches] maxi_length = [np.median(s) for s in switches] else: assert inheritance.shape[0] == positions.shape[0] pos = np.compress(exclude, positions) filtered_pos = [np.insert(np.take(np.compress(fgv, pos), sw.cumsum() - 1), 0, pos[0]) for fgv, sw in zip(forgiven, switches)] mean_length = np.array([np.mean(np.diff(f)) for f in filtered_pos]) medi_length = np.array([np.median(np.diff(f)) for f in filtered_pos]) maxi_length = np.array([np.max(np.diff(f)) for f in filtered_pos]) return mean_length, medi_length, maxi_length, filtered_pos
def _locate(self, x): ''' Given a possible set of color data values, return the ones within range, together with their corresponding colorbar data coordinates. ''' if isinstance(self.norm, (colors.NoNorm, colors.BoundaryNorm)): b = self._boundaries xn = x xout = x else: # Do calculations using normalized coordinates so # as to make the interpolation more accurate. b = self.norm(self._boundaries, clip=False).filled() # We do our own clipping so that we can allow a tiny # bit of slop in the end point ticks to allow for # floating point errors. xn = self.norm(x, clip=False).filled() in_cond = (xn > -0.001) & (xn < 1.001) xn = np.compress(in_cond, xn) xout = np.compress(in_cond, x) # The rest is linear interpolation with clipping. y = self._y N = len(b) ii = np.minimum(np.searchsorted(b, xn), N-1) i0 = np.maximum(ii - 1, 0) #db = b[ii] - b[i0] db = np.take(b, ii) - np.take(b, i0) db = np.where(i0==ii, 1.0, db) #dy = y[ii] - y[i0] dy = np.take(y, ii) - np.take(y, i0) z = np.take(y, i0) + (xn-np.take(b,i0))*dy/db return xout, z
def utest( self, score ): """ Gives the Mann-Withney U test probability that the score is random. See: Mason & Graham (2002) Areas beneath the relative operating characteristics (ROC) and relative operating levels (ROL) curves: Statistical significance and interpretation @param score: the score predicted for each item @type score: [ float ] @return: 1-tailed P-value @rtype: float """ sample1 = N.compress( self.positives, score ) sample1 = sample1[-1::-1] # invert order sample2 = N.compress( N.logical_not( self.positives ), score ) sample2 = sample2[-1::-1] # invert order sample1 = sample1.tolist() sample2 = sample2.tolist() p = stats.mannwhitneyu( sample1, sample2 ) return p[1]
def _locate(self, x): ''' Given a possible set of color data values, return the ones within range, together with their corresponding colorbar data coordinates. ''' if isinstance(self.norm, (colors.NoNorm, colors.BoundaryNorm)): b = self._boundaries xn = x xout = x else: b = self.norm(self._boundaries, clip=False).filled() xn = self.norm(x, clip=False).filled() in_cond = (xn > -0.001) & (xn < 1.001) xn = np.compress(in_cond, xn) xout = np.compress(in_cond, x) y = self._y N = len(b) ii = np.minimum(np.searchsorted(b, xn), N-1) i0 = np.maximum(ii - 1, 0) db = np.take(b, ii) - np.take(b, i0) db = np.where(i0==ii, 1.0, db) dy = np.take(y, ii) - np.take(y, i0) z = np.take(y, i0) + (xn-np.take(b,i0))*dy/db return xout, z
def stochasticPartition(self, data, partition): '''Split the data stochastically according to soft partition''' #sample = numpy.random.rand(partition.shape[0]) < partition sample = partition ldata = numpy.compress(1-sample, data, axis=0) rdata = numpy.compress(sample, data, axis=0) return (numpy.asarray(ldata), numpy.asarray(rdata))
def create_projection_as_numeric_array_3D(self, attr_indices, **settings_dict): valid_data = settings_dict.get("valid_data") class_list = settings_dict.get("class_list") jitter_size = settings_dict.get("jitter_size", 0.0) if valid_data == None: valid_data = self.get_valid_list(attr_indices) if sum(valid_data) == 0: return None if class_list == None and self.data_has_class: class_list = self.original_data[self.data_class_index] xarray = self.no_jittering_scaled_data[attr_indices[0]] yarray = self.no_jittering_scaled_data[attr_indices[1]] zarray = self.no_jittering_scaled_data[attr_indices[2]] if jitter_size > 0.0: xarray += (np.random.random(len(xarray))-0.5)*jitter_size yarray += (np.random.random(len(yarray))-0.5)*jitter_size zarray += (np.random.random(len(zarray))-0.5)*jitter_size if class_list != None: data = np.compress(valid_data, np.array((xarray, yarray, zarray, class_list)), axis = 1) else: data = np.compress(valid_data, np.array((xarray, yarray, zarray)), axis = 1) data = np.transpose(data) return data
def _render(self, gc, pts): with gc: gc.clip_to_rect(self.x, self.y, self.width, self.height) if not self.index: return name = self.selection_metadata_name md = self.index.metadata if name in md and md[name] is not None and len(md[name]) > 0: # FIXME: when will we ever encounter multiple masks in the list? sel_mask = md[name][0] sel_pts = np.compress(sel_mask, pts, axis=0) unsel_pts = np.compress(~sel_mask, pts, axis=0) color = list(self.color_) color[3] *= self.unselected_alpha outline_color = list(self.outline_color_) outline_color[3] *= self.unselected_alpha if unsel_pts.size > 0: self.render_markers_func(gc, unsel_pts, self.marker, self.marker_size, tuple(color), self.unselected_line_width, tuple(outline_color), self.custom_symbol) if sel_pts.size > 0: self.render_markers_func(gc, sel_pts, self.marker, self.marker_size, self.selected_color_, self.line_width, self.outline_color_, self.custom_symbol) else: self.render_markers_func(gc, pts, self.marker, self.marker_size, self.color_, self.line_width, self.outline_color_, self.custom_symbol)
def lcylimits(self): """Determine the y-limts depending on what plots are selected """ mask = (self.dtime > self.lcx1)*(self.dtime<self.lcx2)*(self.goodframes>0) if self.ratiovar.get(): rarr=np.compress(mask,self.ratio) y1=rarr.min() y2=rarr.max() ylabel='Star1/Star2' else: if self.star2var.get() and self.star1var.get(): cfarr=np.compress(mask,self.cflux).max() tfarr=np.compress(mask,self.tflux).max() y1=0 y2=cfarr < tfarr and tfarr or cfarr ylabel='Star Flux' elif self.star2var.get(): cfarr=np.compress(mask,self.cflux) y1=0 y2=cfarr.max() ylabel='Star2 Flux' else: tfarr=np.compress(mask,self.tflux) y1=0 y2=tfarr.max() ylabel='Star1 Flux' return y1, y2, ylabel
def calculate_realexptime(id_arr, utc_arr, dsec_arr, diff_arr, req_texp, utc_list): """Calculates the real exposure time. This makes the following assumptions: #. That the measurement after the turn of the second is a fiducial #. That there is an integer number of frames between each fiducial exposure #. We then set up a metric which is Y=np.sum(i-int(i)) where i=dt/t_exp #. Then the minimum of Y is found between the requested exposure time and the median time difference #. And the best exposure time is the time at that minimum returns median exposure time and real exposure time """ t_exp=0 # calculate the median time try: t_wrong=np.median(diff_arr) except: raise SaltError('Unable to calculate median time difference') # Compress the arrays to find those closest to the second mark mask=(dsec_arr<t_wrong)*(diff_arr>0) t=np.compress(mask,utc_arr) s=np.compress(mask,dsec_arr) id=np.compress(mask,id_arr) # Now set up the components in the equation try: t_start=t[0] dt=t[1:]-t[0] except Exception, e: msg='Unable to set up necessary arrays because %s' % e raise SaltError(msg)
def myzpk2tf(self, z, p, k): z = np.atleast_1d(z) k = np.atleast_1d(k) if len(z.shape) > 1: temp = np.poly(z[0]) b = np.zeros((z.shape[0], z.shape[1] + 1), temp.dtype.char) if len(k) == 1: k = [k[0]] * z.shape[0] for i in range(z.shape[0]): b[i] = k[i] * poly(z[i]) else: b = k * np.poly(z) a = np.atleast_1d(np.poly(p)) # Use real output if possible. Copied from numpy.poly, since # we can't depend on a specific version of numpy. if issubclass(b.dtype.type, np.complexfloating): # if complex roots are all complex conjugates, the roots are real. roots = np.asarray(z, complex) pos_roots = np.compress(roots.imag > 0, roots) neg_roots = np.conjugate(np.compress(roots.imag < 0, roots)) if len(pos_roots) == len(neg_roots): if np.all(np.sort_complex(neg_roots) == np.sort_complex(pos_roots)): b = b.real.copy() if issubclass(a.dtype.type, np.complexfloating): # if complex roots are all complex conjugates, the roots are real. roots = np.asarray(p, complex) pos_roots = np.compress(roots.imag > 0, roots) neg_roots = np.conjugate(np.compress(roots.imag < 0, roots)) if len(pos_roots) == len(neg_roots): if np.all(np.sort_complex(neg_roots) == np.sort_complex(pos_roots)): a = a.real.copy() return b, a
def contributions(Ilength, Olength, scale, kernel,kernel_width): # Antialiasing for downsizing if scale < 1: h = lambda x: kernel(x,scale) kernel_width = kernel_width/scale else: h = kernel # output space coordinate x = np.arange(Olength, dtype = float) x.shape += (1,) # input space coord so that 0.5 in Out ~ 0.5 in In, and 0.5+scale in Out ~ # 0.5 + 1 in In u = x/scale + 0.5*(-1+1.0/scale) left = np.floor(u-kernel_width/2) P = math.ceil(kernel_width) + 2 indices = left + np.arange(P) weights = h(u - indices) norm = np.sum(weights,axis=1) norm.shape += (1,) weights = weights/norm indices = np.minimum(np.maximum(0,indices),Ilength-1) indices = np.array(indices,dtype = int) kill = np.ma.any(weights,0) weights = np.compress(kill,weights,1) indices = np.compress(kill,indices,1) return (weights,indices)
def test8(): global L0, N L = deepcopy(L0) rho = zeros(N, 'double') rho[random.sample(xrange(N), N/2)] = 1 print rho LI = linalg.inv(L) #print L #print LI #I = numpy.dot(L,LI) #I[abs(I)<0.001] = 0 #print I t = numpy.greater(rho, 0) X = numpy.zeros((N,N)) for i in xrange(N): X[0][i] = i print X LIC = numpy.compress(t, LI, 1) print LIC LIC = numpy.compress(t, LIC, 0) print LIC LICI = linalg.inv(LIC) print LICI
def test_np_ufuncs(self): z = self.create_array(shape=(100, 100), chunks=(10, 10)) a = np.arange(10000).reshape(100, 100) z[:] = a eq(np.sum(a), np.sum(z)) assert_array_equal(np.sum(a, axis=0), np.sum(z, axis=0)) eq(np.mean(a), np.mean(z)) assert_array_equal(np.mean(a, axis=1), np.mean(z, axis=1)) condition = np.random.randint(0, 2, size=100, dtype=bool) assert_array_equal(np.compress(condition, a, axis=0), np.compress(condition, z, axis=0)) indices = np.random.choice(100, size=50, replace=True) assert_array_equal(np.take(a, indices, axis=1), np.take(z, indices, axis=1)) # use zarr array as indices or condition zc = self.create_array(shape=condition.shape, dtype=condition.dtype, chunks=10, filters=None) zc[:] = condition assert_array_equal(np.compress(condition, a, axis=0), np.compress(zc, a, axis=0)) zi = self.create_array(shape=indices.shape, dtype=indices.dtype, chunks=10, filters=None) zi[:] = indices # this triggers __array__() call with dtype argument assert_array_equal(np.take(a, indices, axis=1), np.take(a, zi, axis=1))
def unpack_data(path, delimiter, filtr=False, split_column=-1): """Measurements and errors are assumed to be alternating. The last pair of columns corresponds to the dependent variable while the preceeding are independent. If filtr is True, values larger than the error are removed. If split_column is given, the data is split into lumps with a column value in that column, e.g if split_column=(n-1) [n.b we count from 0] and the nth column contains trial number, chemical type etc. this value will be used to categorise the rest of the data and the other procedures will run sequentially on each category, as if they were in different files.""" raw = np.loadtxt(path, delimiter=delimiter, skiprows=1) data_name = os.path.splitext(os.path.basename(path))[0] if split_column != -1: raws = split_file(raw, split_column, data_name) else: # Needed to generalise following iterative step. raws = [(data_name, raw)] for (name, raw) in raws: meas = raw[:, ::2].transpose() err = raw[:, 1::2].transpose() if filtr: test = (abs(meas) >= err).prod(axis=0) meas = np.compress(test, meas, axis=1) err = np.compress(test, err, axis=1) if meas.shape[0] == 2: A = (meas[:-1].ravel(), err[:-1].ravel()) yield name, (A, (meas[-1], err[-1])) else: yield name, ((meas[:-1], err[:-1]), (meas[-1], err[-1]))
def jiu(self): ''' Define Joint Information Uncertainty coef., based on entropy., for discrete values Coefficient change between [0, 1] 0 - no connection 1 - full connection @param X First raster's array @param Y Second raster's array ''' #T, sum_r, sum_s, total, r, s = compute_table(X, Y) table = self.getCrosstable() T = table.getProbtable() #Pij = Tij / total sum_rows = table.getProbRows() #Pi. = Ti. / total i=[0,(r-1)] sum_cols = table.getProbCols() #P.j = T.j / total j=[0,(s-1)] #to calculate the entropy we take the logarithm, #logarithm of zero does not exist, so we must mask zero values sum_rows = np.compress(sum_rows != 0, sum_rows) sum_cols = np.compress(sum_cols != 0, sum_cols) #Compute the entropy coeff. of two raster H_x = -np.sum(sum_rows * np.log(sum_rows)) H_y = -np.sum(sum_cols * np.log(sum_cols)) #Compute the joint entropy coeff. T = np.ma.array(T, mask=(T == 0)) T = np.ma.compressed(T) H_xy = -np.sum(T * np.log(T)) # Compute the Joint Information Uncertainty U = 2.0 * ((H_x + H_y - H_xy)/(H_x + H_y)) return U
def estimateState(self): """ Updates the estimate of the state """ best = numpy.argmax(self.Weights) beststate = self.States[best,:] #print "Best State:", beststate cond = (numpy.sum(numpy.fabs(self.States - beststate), axis=1) < 1) beststates = numpy.compress(cond, self.States, axis=0) bestweights = numpy.compress(cond, self.Weights) #print "States", self.States #print "States within window:", cond #print "States close to best", len(beststates), beststates #print "Weights close to best", bestweights #print "Product:", (bestweights*beststates.T).T bestweights /= numpy.sum(bestweights) self.State = numpy.sum((bestweights*beststates.T).T, axis=0) #print "Estimate:", self.State #print numpy.fabs(numpy.arctan2(self.State[Localisation.YDOT], self.State[Localisation.XDOT]) - self.State[Localisation.THETA]) - self.__controlToVelocityVector() if numpy.isnan(self.State[0]): print "FAIL" self.__updateAttributesFromState()
def roiEnergyAnalysis(data): '''Troubleshooting function, compares the observed sum energy in an ROI to the genEnergy''' genEnergies = [] sumEnergies = [] pbar = progressbar("Processing event &count&:", len(data)+1) pbar.start() count = 0 for event in data: genEnergy = event[2]['getpt'] * np.cosh(event[2]['geneta']) for i in range(len(genEnergy)): clustersIndices = np.compress(event[1]['ROI'] == i, event[1]['clusterID'], axis=0) #|Only take clusters corresponding to right ROI clusterEnergies = [] for clusterID in clustersIndices: #|Only take hits corresponding to correct cluster hits = np.compress(event[0]['clusterID'] == clusterID, event[0], axis=0) energies = hits['en'] for energy in energies: clusterEnergies.append(energy) #|Add the energy to the cluster energies ROIEnergy = np.sum(clusterEnergies) # Append to original lists genEnergies.append(genEnergy[i]) sumEnergies.append(ROIEnergy) pbar.update(count) count += 1 pbar.finish() # np.save("sums.npy", sumEnergies) # np.save("gens.npy", genEnergies) # Plot it Plotter.sumEnergyVsGenEnergy(sumEnergies, genEnergies)
def gammaGunFilter(data, quiet=True): '''Filters gamma gun data set to clean it up some, removing some of the crap.''' if not quiet: print "Filtering" data = np.compress([len(event[2]) >= 2 for event in data], data, axis=0) #|Get at least two ROI's data = np.compress([np.max(event[2]['eta'])*np.min(event[2]['eta']) < 0 for event in data], data, axis=0) #|Require an eta separation of opposite endcaps to prevent high errors return data
def make_lcdata(self): #cut the data mask = (self.goodframes>0) self.tarr=np.compress(mask,self.dtime) self.rarr=np.compress(mask,self.ratio) self.tfarr=np.compress(mask,self.tflux) self.cfarr=np.compress(mask,self.cflux)
def get_posterior_sample(self, n): """ Return a sample of the posterior distribution. Uses SIR algorithm. :Parameters: - `n`: Sample size. """ if self.posterior.any():# Use last posterior as prior k = stats.kde.gaussian_kde(self.posterior) s = k.resample(n) else: s = self.get_prior_sample(n) if self.data != None: m = self.rang[0] M = self.rang[1] step = self.res supp = arange(m, M, step)#support s = compress(less(s.ravel(), M) & greater(s.ravel(), m), s)#removing out-of-range samples d = stats.uniform.rvs(loc=0, scale=1, size=len(s))#Uniform 0-1 samples w = self.pdf(supp) * self.likelihood w = w / sum(w) #normalizing weights sx = searchsorted(supp, s) w = w[sx-1]#search sorted returns 1-based binlist post = compress(d < w, s) self.posterior = post return post else: return array([])
def calcZ01andZ10(Y, MPS): try: U, S, V = spla.svd(Y, full_matrices=True) except spla.LinAlgError as err: if 'empty' in err.message: row, col = Y.shape Z01 = np.array([], dtype=Y.dtype).reshape(row, 0) Z10 = np.array([], dtype=Y.dtype).reshape(0, col) print "Empty", Z01.shape, Z10.shape else: print >> sys.stderr, "calcZ01andZ10: Error", I, err raise else: print "S", S, "\nU", U, "\nV", V __, chi, __ = MPS.shape mask = (S > expS) #np.array([True] * S.shape[0]) mask[xiTilde - chi:] = False U = np.compress(mask, U, 1) S = np.compress(mask, S, 0) V = np.compress(mask, V, 0) Ssq = np.diag(np.sqrt(S)) Z01 = np.dot(U, Ssq) Z10 = np.dot(Ssq, V) print "Fill ", U.shape, V.shape, "mask", mask eps = np.linalg.norm(np.dot(Z01, Z10)) print "eps", I, eps print "Z01", Z01.shape, "\n", Z01, "\nZ10", Z10.shape, "\n", Z10 return Z01, Z10
def ref(m): if m.shape[0]==1: for i in range(1,m.shape[1]): m[0,i]=m.item(0,i)/m.item(0,0) m[0,0]=1 return m m=np.copy(trim(m)) if m.item(0,0)==0: for i in range(1,m.shape[0]): if m.item(i,0)!=0: m=np.copy(swap(m,0,i)) break for i in range(1,m.shape[1]): m[0,i]=m.item(0,i)/m.item(0,0) m[0,0]=1 for j in range(1,m.shape[0]): for k in range(1,m.shape[1]): m[j,k]=m.item(j,k)-(m.item(j,0))*(m.item(0,k)) m[j,0]=0 a=[False] b=[False] for i in range(1,m.shape[0]): a=np.append(a,True) for i in range(1,m.shape[1]): b=np.append(b,True) n=np.compress(a,np.compress(b,m,axis=1),axis=0) n=np.copy(rref2(n)) for i in range(1,m.shape[0]): for j in range(1,m.shape[1]): m[i,j]=n.item(i-1,j-1) return m
def refine(self, gr, tol=0.05): tx, ty, tz = gr.translation wvln = float(self.pars.get("wavelength")) if hasattr(gr, "pks") and gr.pks is not None: #print "Got pks" pks = gr.pks XS = self.XS[:, pks] XB = self.XB[:, pks] else: #print "New pks" XS = self.XS XB = self.XB pks = np.arange(len(XS[0])) ret = d_XSXB_to_gv(XS, XB, tx, ty, tz, wvln) gv = ret[0] dg0_dt = ret[1], ret[4], ret[7] dg1_dt = ret[2], ret[5], ret[8] dg2_dt = ret[3], ret[6], ret[9] hklr = np.dot(gr.ubi, gv) hkli = np.round(hklr) hkle = hklr - hkli scor = np.sqrt((hkle * hkle).sum(axis=0)) #print "before",len(pks),pks if tol is not None: use = np.compress(scor < tol, np.arange(len(gv[0]))) #print "after",len(pks),pks gr.pks = pks[use] else: use = np.arange(len(gr.pks), dtype=int) #print "score = ", scor[pks].sum()/len(pks), len(pks), tol # peaks to use are those with scor OK # # UB.h = gvcalc # dg/dUB = h # dg/dT = found above gcalc = np.dot(gr.UB, hkli) diff = np.take(gv - gcalc, use, axis=1) # print diff.shape, pks.shape # gv[0],[1],[2] = 3 # tx, ty, ty = 3 # UB00 ... UB22 = 9 # want derivative of diff w.r.t each variable grads = np.zeros((12, 3, len(use))) for i in range(3): for j in range(3): # tx, ty, tz #print 1+j*3+i, #print ret[1+j*3+i].shape grads[j, i] = ret[1 + j * 3 + i][use] # print grads[j,i] for j in range(3): # gx = 0h + 1k + 2l # gy = 3h + 4k + 5l # gz = 6h + 7k + 8l # i is gx, gy, gz # j is ub elements grads[3 + j + i * 3, i] = hkli[j][use] # print grads[3+j+i*3,i] # grains = 12, xyz, pks mtx = np.zeros((12, 12)) for i in range(12): for j in range(i, 12): for k in range(3): mtx[i, j] += (grads[i, k, :] * grads[j, k, :]).sum() if j != i: mtx[j, i] = mtx[i, j] # mtx = np.dot( grads, grads.T) # vector, outer, ? rhs = np.zeros(12) for i in range(12): for k in range(3): rhs[i] += (grads[i, k] * diff[k]).sum() #print mtx # print rhs imt = np.linalg.inv(mtx) shifts = np.dot(imt, rhs) tx = tx - shifts[0] ty = ty - shifts[1] tz = tz - shifts[2] gr.translation = [tx, ty, tz] ub = gr.UB.ravel() np.add(ub, shifts[3:], ub) gr.set_ubi(np.linalg.inv(np.reshape(ub, (3, 3)))) gr.npks = len(use) #1/0 return gr
if __name__ == "__main__": import sys from ImageD11.indexing import ubitocellpars o = fittrans(sys.argv[1], sys.argv[2]) gl = grain.read_grain_file(sys.argv[3]) gref = gl[0] import time start = time.time() gfl = [] ng = 0 # Take old peak assignments: if 1: print("Using existing peak assignments") inds = np.arange(o.colfile.nrows, dtype=int) for i, gref in enumerate(gl): gref.pks = np.compress(o.colfile.labels == i, inds) tols = [ None, ] * 3 else: tols = [0.05, 0.01, 0.0075] for gref in gl: for ii, tol in enumerate(tols): #print gref.translation gref = o.refine(gref, tol=tol) #print ii, gref.translation, gref.npks, #print i,gref.npks # gref.pks = None # re-assign after convergence # gref = o.refine( gref, tol=0.0075) gfl.append(gref)
dataBlockingResultsFile + ' already exists but is empty...It will be replaced with a new file.\n' ) fe = 0 else: print(dataBlockingResultsFile + ' does not exists. It will be created.\n') fe = 0 if fe == 0: kk = kk + 1 print("Analyzing file: " + tf + "\n") thermo_data_raw = np.genfromtxt(tf, names=True) condition = np.logical_and(eqData[:, 0] == P, eqData[:, 1] == T) eqSteps = np.compress(condition, eqData, axis=0)[0, 2].astype(int) uncorrelatedBlockSize = np.compress(condition, eqData, axis=0)[0, 3].astype(int) LEstartSteps = np.compress(condition, eqData, axis=0)[0, 4].astype(int) LEstartSteps = max(LEstartSteps, eqSteps) printInterval = int(thermo_data_raw[1]['Step'] - thermo_data_raw[0]['Step']) le_data_raw = thermo_data_raw[int(LEstartSteps / printInterval) + 1:][['Step', 'lE']] thermo_data_raw = thermo_data_raw[int(eqSteps / printInterval) + 1:] #for rawSampleNumber in range(np.size(thermo_data_raw)): #print("rawSampleNumber: " + str(rawSampleNumber) + " rawSample: " + str(thermo_data_raw[rawSampleNumber])) #energy_tot_mean = np.mean(thermo_data_raw[:]['Energy']) #print("Total mean energy: " + str(energy_tot_mean));
MBR_pheno_input = MBR_pheno_input[skitzo_class != 0] MBR_pheno_h = read_header( path + "/data_encoded/phenotypes_age/mbr_cat_headers_age.txt") MBR_pheno, MBR_pheno_input, MBR_pheno_h = remove_not_obs_cat( MBR_pheno, MBR_pheno_input, MBR_pheno_h, 0.01) sibling_pheno, sibling_pheno_input = read_cat( path + "/data_encoded/input/sibling_cat.npy") sibling_pheno = sibling_pheno[skitzo_class != 0] sibling_pheno_input = sibling_pheno_input[skitzo_class != 0] sibling_pheno_h = read_header( path + "/data_encoded/phenotypes_age/sibling_cat_headers.txt") sibling_pheno, sibling_pheno_input, sibling_pheno_h = remove_not_obs_cat( sibling_pheno, sibling_pheno_input, sibling_pheno_h, 0.01) sibling_pheno = np.compress((sibling_pheno != 0).sum(axis=(0, 1)), sibling_pheno, axis=2) # combine MBR and sibling #MBR_sibling = np.concatenate((MBR_pheno, sibling_pheno), axis=1) #MBR_sibling_h = np.concatenate((MBR_pheno_h, sibling_pheno_h)) ## load in genotype geno, geno_input = read_cat(path + "/data_encoded/input/genotypes_all.npy") geno = geno[skitzo_class != 0] geno_input = geno_input[skitzo_class != 0] geno_h = read_header(path + "/data_encoded/genomics/genotypes_headers_all.txt") geno, geno_input, geno_h = remove_not_obs_ordinal(geno, geno_input, geno_h, 0.01) hla_pheno, hla_pheno_input = read_cat(path +
if (export_mesh): m.export_to_vtk('mesh.vtk') print('\nYou can view the mesh for instance with') print('mayavi2 -d mesh.vtk -f ExtractEdges -m Surface \n') # Integration method used # mim = gf.MeshIm(m, gf.Integ('IM_PYRAMID_COMPOSITE(IM_TETRAHEDRON(6))')) mim = gf.MeshIm(m, gf.Integ('IM_PYRAMID(IM_GAUSS_PARALLELEPIPED(3,3))')) # mim = gf.MeshIm(m, gf.Integ('IM_TETRAHEDRON(5)')) # Boundary selection flst = m.outer_faces() fnor = m.normal_of_faces(flst) tleft = abs(fnor[1, :] + 1) < 1e-14 ttop = abs(fnor[0, :] - 1) < 1e-14 fleft = np.compress(tleft, flst, axis=1) ftop = np.compress(ttop, flst, axis=1) fneum = np.compress(True - ttop - tleft, flst, axis=1) # Mark it as boundary DIRICHLET_BOUNDARY_NUM1 = 1 DIRICHLET_BOUNDARY_NUM2 = 2 NEUMANN_BOUNDARY_NUM = 3 m.set_region(DIRICHLET_BOUNDARY_NUM1, fleft) m.set_region(DIRICHLET_BOUNDARY_NUM2, ftop) m.set_region(NEUMANN_BOUNDARY_NUM, fneum) # Interpolate the exact solution (Assuming mfu is a Lagrange fem) Ue = mfu.eval('y*(y-1)*x*(x-1)+x*x*x*x*x') # Interpolate the source term
def kaplan_meier_estimator(event, time_exit, time_enter=None, time_min=None, reverse=False): """Kaplan-Meier estimator of survival function. See [1]_ for further description. Parameters ---------- event : array-like, shape = (n_samples,) Contains binary event indicators. time_exit : array-like, shape = (n_samples,) Contains event/censoring times. time_enter : array-like, shape = (n_samples,), optional Contains time when each individual entered the study for left truncated survival data. time_min : float, optional Compute estimator conditional on survival at least up to the specified time. reverse : bool, optional, default: False Whether to estimate the censoring distribution. When there are ties between times at which events are observed, then events come first and are subtracted from the denominator. Only available for right-censored data, i.e. `time_enter` must be None. Returns ------- time : array, shape = (n_times,) Unique times. prob_survival : array, shape = (n_times,) Survival probability at each unique time point. If `time_enter` is provided, estimates are conditional probabilities. Examples -------- Creating a Kaplan-Meier curve: >>> x, y = kaplan_meier_estimator(event, time) >>> plt.step(x, y, where="post") >>> plt.ylim(0, 1) >>> plt.show() References ---------- .. [1] Kaplan, E. L. and Meier, P., "Nonparametric estimation from incomplete observations", Journal of The American Statistical Association, vol. 53, pp. 457-481, 1958. """ event, time_enter, time_exit = check_y_survival(event, time_enter, time_exit, allow_all_censored=True) check_consistent_length(event, time_enter, time_exit) if time_enter is None: uniq_times, n_events, n_at_risk, n_censored = _compute_counts( event, time_exit) if reverse: n_at_risk -= n_events n_events = n_censored else: if reverse: raise ValueError( "The censoring distribution cannot be estimated from left truncated data" ) uniq_times, n_events, n_at_risk = _compute_counts_truncated( event, time_enter, time_exit) # account for 0/0 = nan ratio = numpy.divide(n_events, n_at_risk, out=numpy.zeros(uniq_times.shape[0], dtype=float), where=n_events != 0) values = 1.0 - ratio if time_min is not None: mask = uniq_times >= time_min uniq_times = numpy.compress(mask, uniq_times) values = numpy.compress(mask, values) y = numpy.cumprod(values) return uniq_times, y
def qtapr(ballots, weights, cnames, numseats, verbose=0, use_mj=True, use_two_q=False): """Run quota threshold approval rating method (MJ-style or Bucklin-style) to elect <numseats> winners in a Droop proportional multiwnner election. """ numballots, numcands = np.shape(ballots) ncands = numcands numvotes = weights.sum() numvotes_orig = float(numvotes) # Force a copy quota = droopquota(numvotes, numseats) maxscore = int(ballots.max()) cands = np.arange(numcands) winners = [] maxscorep1 = maxscore + 1 factor_array = [] qthresh_array = [] for seat in range(numseats): if verbose > 0: print("- " * 30, "\nStarting count for seat", seat + 1) print("Number of votes:", myfmt(numvotes)) # ---------------------------------------------------------------------- # Tabulation: # ---------------------------------------------------------------------- # Score and Cumulative Score arrays (summing downward from maxscore) S, T = tabulate_score_from_ratings(ballots, weights, maxscore, ncands) (winner, winsum, factor, ranking, ratings) = aqt(maxscore, quota, ncands, cands, S, T, use_mj=use_mj, use_two_q=use_two_q) winner_quota_threshold = ratings[winner][0] # Seat the winner, then eliminate from candidates for next count if verbose: print("\n-----------\n*** Seat {}: {}\n-----------\n".format( seat + 1, cnames[winner])) if verbose > 1: print("QTAR ranking for this seat:") if use_mj: if use_two_q: for c in ranking: r, twoqavg, *rest = ratings[c] print("\t{}:({},{},{})".format( cnames[c], r, myfmt(twoqavg), ",".join([ "({},{})".format(s, myfmt(t)) for s, t in rest ]))) else: for c in ranking: r, *rest = ratings[c] print("\t{}:({},{})".format( cnames[c], r, ",".join([ "({},{})".format(s, myfmt(t)) for s, t in rest ]))) else: if use_two_q: for c in ranking: r, twoqavg, t = ratings[c] print("\t{}:({},{},{})".format( cnames[c], r, myfmt(twoqavg), myfmt(t))) else: for c in ranking: r, t = ratings[c] print("\t{}:({},{})".format( cnames[c], r, myfmt(t))) print("") if (seat < numseats): winners += [winner] cands = np.compress(cands != winner, cands) weights = np.multiply( weights, np.where(ballots[..., winner] < winner_quota_threshold, 1, factor)) numvotes = weights.sum() scorerange = np.arange(maxscorep1) factor_array.append(factor) qthresh_array.append(winner_quota_threshold) # Reweight ballots: winscores = ballots[..., winner] if verbose: print("Winner's votes per rating: ", (", ".join([ "{}:{}".format(j, myfmt(f)) for j, f in zip(scorerange[-1:0:-1], S[-1:0:-1, winner]) ]))) print("After reweighting ballots:") print("\tQuota: {}%".format(myfmt(quota / numvotes_orig * 100))) print(("\tWinner's quota approval threshold rating " "before reweighting: {}%").format( myfmt((winsum / numvotes_orig) * 100))) print("\tReweighting factor: ", factor) print(("\tPercentage of vote remaining " "after reweighting: {}%").format( myfmt((numvotes / numvotes_orig) * 100))) if verbose > 1 and numseats > 1: print("- " * 30 + "\nReweighting factors for all seat winners:") for w, f, qt in zip(winners, factor_array, qthresh_array): print("\t{} : ({}, {})".format(cnames[w], myfmt(qt), myfmt(f))) if verbose > 3 and numseats > 1: print("- " * 30 + "\nRemaining ballots and weights:") print("{},{}".format("weight", ','.join(cnames))) for w, ballot in zip(weights, ballots): print("{},{}".format(myfmt(w), ','.join([str(b) for b in ballot]))) return (winners)
def active_from_full(self, joints): return np.compress(self.active_links_mask, joints, axis=0)
def iFAB_PCL_Sequence( Points4D, pltFlag = 0 ): N = Points4D.shape[0]; #N = size(Points4D,1); Seq = 0; #Seq = 1; SequenceIx = np.arange(0,N); SortedSequence = np.zeros(N); # Sequentialize Data from Index Marker SeqData = Points4D[:,3]; SeqData1 = np.roll(SeqData,-1); SeqDiff = (SeqData - SeqData1); # Find Difference between adjacent values SeqDiff = np.delete(SeqDiff,[SeqDiff.size - 1]); # Identify huge jumps as folds FoldPos = (SeqDiff > 1024); Folds = sum(FoldPos); CarryOverFlag = 0; IxBuffer = np.array([0]); CarryOverBuffer = np.array([0,0,0,0]); if (Folds > 0): print "\t\t Total Folds Identified : ", Folds, "\n"; FoldIx = SequenceIx[FoldPos]; F_ix = np.arange(0,FoldIx.size) Fd = np.roll(FoldIx,-1); Overlapped = (abs(FoldIx - Fd) < 100); Overlapped[-1] = 0; for i in xrange(0,Folds): if np.logical_not((Overlapped[i])): BufferData = Points4D[Seq:(FoldIx[i] + 1),3]; # Get first set I = np.argsort(BufferData); # Sort picked fold SortedSequence[Seq:FoldIx[i] + 1] = I + Seq; # Save sorted fold Seq = FoldIx[i] + 1; # Update fold index if CarryOverFlag: # if previous folds were redundant act accordingly In_Ix = np.arange(0,IxBuffer.size - 1); Points4D = np.insert(Points4D,In_Ix + Seq,CarryOverBuffer[1:,:],0); # re-insert removd values into next fold CarryOverBuffer = np.array([[0],[0],[0],[0]]); # empty accumulated buffer FoldIx = FoldIx + IxBuffer.size - 1; # update index values IxBuffer = np.array([0]); #reset redundant fold count if (i==(Folds-1)): # If last fold index, sort the rest I = np.argsort(Points4D[Seq:,3]); SortedSequence[Seq:] = Seq + I; CarryOverFlag = 0; CarryOverBuffer = np.array([0,0,0,0]); else: CarryOverFlag = 1; # Mark for adding values into next fold IxBuffer = np.hstack((IxBuffer,FoldIx[i]+1)); # Monitor fold overlaps CarryOverBuffer = np.vstack((CarryOverBuffer,Points4D[FoldIx[i]+1,:])) # Accumulate Values at overlapping folds B = np.zeros([np.size(Points4D,0),1]); # remove the values at overlapping folds B = B+1; B[FoldIx[i]+1] = 0; B = B.flatten(); Points4D = np.compress(B,Points4D,0); FoldIx = FoldIx - 1; # update fold index after popping a value else: print "\t\t No Folds Identified" I = np.argsort(Points4D[:,3]); SortedSequence = I; OutOfSequence = sum(SortedSequence != SequenceIx); print "\t\t Points out of Sequence: ", OutOfSequence, "\n"; Points3D = Points4D[np.int64(SortedSequence),0:3]; return Points3D
def imageSwathVar(granules, variable, scaleFactor, title, outFile, filterMin=None, filterMax=None, scaleMin=None, scaleMax=None, imageWidth=None, imageHeight=None, plotType='map', projection='cyl', markerSize=10, **options): if filterMin == 'auto': filterMin = None if filterMax == 'auto': filterMax = None # files = [localize(url) for url in granules if url != 'None'] files = granules imageFiles = [] lonLatBounds = [] for i, file in enumerate(files): print 'imageSwathVar: Reading %s: %s' % (file, variable) localFile = localize(file, retrieve=False) if i == 0: swath = hdfeos.swaths(file)[0] # geoFields = hdfeos.swath_geo_fields(file, swath) lat = hdfeos.swath_field_read(file, swath, 'Latitude') lon = hdfeos.swath_field_read(file, swath, 'Longitude') ### time = hdfeos.swath_field_read(file, swath, 'Time') ### pressure = hdfeos.swath_field_read(file, swath, '???') if N.minimum.reduce(lon.flat) < -360. or N.minimum.reduce( lat.flat) < -90.: useImageMap = False # have missing values in lat/lon coord variables else: useImageMap = True dataFields = hdfeos.swath_data_fields(file, swath) if '[' not in variable: varName = variable else: varName, slice = variable.split('[') if varName not in dataFields: die('%s not a variable in %s' % (variable, file)) if '[' not in variable: var = hdfeos.swath_field_read(file, swath, variable) * float(scaleFactor) else: vals = hdfeos.swath_field_read(file, swath, varName) var = eval('['.join(('vals', slice))) var = var * float(scaleFactor) print 'imageSwathVar: Variable range: %f -> %f' % (min( min(var)), max(max(var))) if plotType != 'map' or not useImageMap: lat = lat.flat lon = lon.flat var = var.flat if filterMin is not None or filterMax is not None: if filterMin is not None and filterMax is None: cond = N.greater(var, float(filterMin)) elif filterMin is None and filterMax is not None: cond = N.less(var, float(filterMax)) else: cond = N.logical_and(N.greater(var, float(filterMin)), N.less(var, float(filterMax))) if plotType == 'map' and useImageMap: lat = MA.masked_where(cond, lat, copy=0) lon = MA.masked_where(cond, lon, copy=0) var = MA.masked_where(cond, var, copy=0) else: lat = N.compress(cond, lat.flat) lon = N.compress(cond, lon.flat) var = N.compress(cond, var.flat) lonLatBound = (min(min(lon)), min(min(lat)), max(max(lon)), max(max(lat))) lonLatBounds.append(lonLatBound) if plotType == 'map': imageFile = localFile + '_image.png' if useImageMap: upOrDown = 'upper' if lat[0, 0] < lat[-1, 0]: upOrDown = 'lower' if lon[0, 0] > lon[0, -1]: var = fliplr(var) image2(var, scaleMin, scaleMax, imageFile, upOrDown=upOrDown, **options) # plainImage2(var, imageFile) else: marksOnMap(lon, lat, var, scaleMin, scaleMax, imageWidth, imageHeight, imageFile, projection, autoBorders=True, title=title + ' ' + file, sizes=markerSize * markerSize, **options) elif plotType == 'hist': imageFile = localFile + '_aot_hist.png' hist(var, 50, imageFile) else: die("plotSwathVar: plotType must be 'map' or 'hist'") imageFiles.append(imageFile) print "imageSwathVar results:", imageFiles return (imageFiles, lonLatBounds)
def errore(img, imgpsf, coordlist, size, truemag, fwhm0, leng0, _show, _interactive, _numiter, z11, z22, midpt, nax, nay, xbgord0, ybgord0, _recenter, apco0, dmax, dmin): import lsc import os, sys, re, string from numpy import array, mean, std, compress, average from pyraf import iraf if not _numiter: _numiter = 3 dartf = 100 while dartf >= size - 1: if _interactive: artfac0 = raw_input( '>>> Dispersion of artificial star positions (in units of FWHM) [1] ' ) if not artfac0: artfac0 = 1 else: artfac0 = 1 try: artfac0 = float(artfac0) if float(artfac0) >= size - 1: print '!!! WARNING: ' + str( artfac0) + ' too large (max ' + str(size) + '- 1)' print 'try again....' else: dartf = artfac0 except: print '#### WARNING: ' + str(artfac0) + ' should be a number !!!!' print 'try again....' lsc.util.delete("tmpar?") lsc.util.delete('artskyfit.fits') os.system('cp skyfit.fits artskyfit.fits') i = 0 tmpart = [] while i <= 8: lsc.util.delete( "reserr.fit?,artbg.fit?,artstar.fit?,artres.fit?,artfit.fit?") artrad = fwhm0 / 2. #artseed = artseed+1234 artx = int(i / 3.) - 1 if i <= 2: arty = artx + i if 3 <= i <= 5: arty = artx - 1 + i - 3 if i >= 6: arty = artx - 2 + i - 6 ff = open(img + ".sn.coo", 'r') ss = ff.readline() ff.close() xbb = float(string.split(ss)[0]) ybb = float(string.split(ss)[1]) xbb = xbb + artx * fwhm0 * artfac0 ybb = ybb + arty * fwhm0 * artfac0 lsc.util.delete(coordlist) ff = open(coordlist, 'w') ff.write(str(xbb) + ' ' + str(ybb) + ' ' + str(truemag[0]) + " 1") ff.close() xb1 = int(float(xbb) - fwhm0 * float(leng0) / 2) xb2 = int(float(xbb) + fwhm0 * float(leng0) / 2) yb1 = int(float(ybb) - fwhm0 * float(leng0) / 2) yb2 = int(float(ybb) + fwhm0 * float(leng0) / 2) sec = "1 " + str(xb1) + " 1 " + str(nay) + '\n' sec = sec + str(xb2) + ' ' + str(nax) + " 1 " + str(nay) + '\n' sesc = sec + str(xb1) + ' ' + str(xb2) + " 1 " + str(yb1) + '\n' sec = sec + str(xb1) + ' ' + str(xb2) + ' ' + str(yb2) + ' ' + str( nay) + '\n' ff = open('sec', 'w') ff.write(sec) ff.close() lsc.util.delete("reserr.ar?") lsc.util.delete("artlist.ma?") lsc.util.delete("artsky.fit?") lsc.util.delete("artbg.fit?") lsc.util.delete("artbgs.fit?") lsc.util.delete("artsn.fit?") lsc.util.delete("artres.fit?") lsc.util.delete("artlist.al?") iraf.addstar("artskyfit", coordlist, imgpsf, "reserr", nstar=1, veri='no', simple='yes', verb='no') # reserr = skyfit + artificial star ######## inp = "artbg.fits[" + str(xb1) + ":" + str(xb2) + "," + str( yb1) + ":" + str(yb2) + "]" out = "artsky.fits[" + str(xb1) + ":" + str(xb2) + "," + str( yb1) + ":" + str(yb2) + "]" iraf.imsurfit("reserr", "artbg", xorder=xbgord0, yorder=ybgord0, regions="section", section="sec") midpt = np.mean(fits.getdata('artbg.fits')) iraf.imcopy('reserr.fits', 'artsky.fits') iraf.imcopy(inp, 'artbgs.fits') iraf.imcopy("artbgs.fits", out) iraf.imarith("reserr", "-", "artsky", "artsn", calctype="r", pixtype="r", verb='no') iraf.imarith("artsn", "+", midpt, "artsn", verb='no') artap1, artap2, artap3, artmag1, artmag2, artmag3, dartmag1, dartmag2, dartmag3, artfitmag, arttruemag, artmagerr, artcentx, artcenty = \ fitsn(img,imgpsf,coordlist,_recenter,fwhm0,'reserr','artsn','artres',_show,_interactive,dmax,dmin,z11,z22,midpt,size,apco0) for ii in range(0, _numiter): lsc.util.delete("reserr.ar?") lsc.util.delete("artlist.ma?") lsc.util.delete("artsky.fit?") lsc.util.delete("artbg.fit?") lsc.util.delete("artbgs.fit?") lsc.util.delete("artsn.fit?") lsc.util.delete("artres.fit?") lsc.util.delete("artlist.al?") iraf.imsurfit("skyfit", "artbg", xorder=xbgord0, yorder=ybgord0, regions="section", section="sec") midpt = np.mean(fits.getdata('artbg.fits')) iraf.imcopy("reserr.fits", "artsky.fits") iraf.imcopy(inp, "artbgs.fits") iraf.imcopy("artbgs.fits", out) iraf.imarith("reserr", "-", "artsky", "artsn", calctype="r", pixtype="r", verb='no') iraf.imarith("artsn.fits", "+", midpt, "artsn.fits", verb='no') artap1, artap2, artap3, artmag1, artmag2, artmag3, dartmag1, dartmag2, dartmag3, artfitmag, arttruemag, artmagerr, artcentx, artcenty = \ fitsn(img,imgpsf,coordlist,_recenter,fwhm0,'reserr','artsn','artres',_show,_interactive,dmax,dmin,z11,z22,midpt,size,0) ####### if i == 0: era = 'yes' else: era = 'no' artx = .5 + .25 * artx arty = .5 + .25 * arty if _show: _tmp1, _tmp2, goon = lsc.util.display_image('skyfit.fits', 1, '', '', False, _xcen=artx, _ycen=arty, _xsize=.25, _ysize=.25, _erase=era) try: tmpart.append(float(arttruemag[0])) except: pass i = i + 1 for i in tmpart: print i print " ########## " try: media = mean(array(tmpart)) arterr = std(array(tmpart)) arterr2 = std( compress((average(tmpart) - std(tmpart) < array(tmpart)) & (array(tmpart) < average(tmpart) + std(tmpart)), array(tmpart))) except: media = 0 arterr = 0 arterr2 = 0 print '### average = %6.6s \t arterr= %6.6s ' % (str(media), str(arterr)) print '### %6.6s \t (error at 1 sigma rejection) ' % (str(arterr2)) lsc.util.delete( "reserr.fit?,artbg.fit?,artstar.fit?,artres.fit?,artfit.fit?,artskyfit.fit?" ) lsc.util.delete("reserr.ar?") lsc.util.delete("artlist.co?") return arterr2, arterr
def qta(maxscore, quota, ncands, remaining, S, T, use_mj=True, use_two_q=False): """Quota Threshold approval single-winner method, using either Majority Judgment style tie-breaker for the approval quota threshold (default) or ER-Bucklin-ratings style """ ratings = dict() twoq = quota * 2 for c in remaining: r1q_unset = True r1q = 0 r2q = 0 tt_surplus = 0. ss = S[..., c] tt = T[..., c] s = 0 for r in range(maxscore, -1, -1): s += ss[r] * r if r1q_unset and (tt[r] > quota): r1q_unset = False r1q = r if not use_two_q: # If not using the two-quota average score tie-breaker, # leading part of the AQT score is the quota threshold rating ratings[c] = (r1q, ) break if tt[r] > twoq: r2q = r tt_surplus = tt[r2q] - twoq break if use_two_q: # leading part of AQT score is quota threshold rating and average score # in the top two quota blocks ratings[c] = (r1q, (s - tt_surplus * r) / twoq) elif r1q_unset: # If not using the two-quota average score tie-breaker, # leading part of the AQT score is the quota threshold rating ratings[c] = (r1q, ) scores = np.arange(maxscore + 1) if use_mj: # Majority Judgment style approval quota threshold for c in remaining: ss = S[..., c] tt = T[..., c] dd = abs(tt - quota) ratings[c] = (*list(ratings[c]), *[(x, tt[x]) for x in np.array( sorted(np.compress(ss > 0, scores), key=(lambda x: dd[x])))]) else: # ER-Bucklin-ratings style approval quota threshold for c in remaining: tt = T[..., c] ratings[c] = (*list(ratings[c]), tt[r]) ranking = sorted(remaining, key=(lambda c: ratings[c]), reverse=True) winner = ranking[0] winsum = T[ratings[winner][0], winner] if winsum >= quota: factor = (1. - quota / winsum) else: factor = 0 return (winner, winsum, factor, ranking, ratings)
def ymax(self, axis='s'): c, i = divmod(self.icount - 1, self.samples) data = np.compress(self.keep, self.y, axis=1) return np.max(data)
def compress(condition, a, axis=0): return N.compress(condition, a, axis)
# volatility process paths v = SRD_generate_paths(x_disc, v0, kappa_v, theta_v, sigma_v, T, M, I, rand, 2, cho_matrix) # index level process paths S = H93_index_paths(S0, r, v, 1, cho_matrix) for K in k_list: # strikes # inner value matrix h = np.maximum(K - S, 0) # value/cash flow matrix V = np.maximum(K - S, 0) for t in xrange(M - 1, 0, -1): df = np.exp(-(r[t] + r[t + 1]) / 2 * dt) # select only ITM paths itm = np.greater(h[t], 0) relevant = np.nonzero(itm) rel_S = np.compress(itm, S[t]) no_itm = len(rel_S) if no_itm == 0: cv = np.zeros((I), dtype=np.float) else: rel_v = np.compress(itm, v[t]) rel_r = np.compress(itm, r[t]) rel_V = (np.compress(itm, V[t + 1]) * np.compress(itm, df)) matrix = np.zeros((D + 1, no_itm), dtype=np.float) matrix[10] = rel_S * rel_v * rel_r matrix[9] = rel_S * rel_v matrix[8] = rel_S * rel_r matrix[7] = rel_v * rel_r matrix[6] = rel_S ** 2 matrix[5] = rel_v ** 2
def average(self, axis='s'): """average of the whole curve""" c, i = divmod(self.icount - 1, self.samples) data = np.compress(self.keep, self.y, axis=1) return np.average(data)
def plot_3d_comp_Poisson(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True): """ Poisson comparison between 3d model and data. model: 3-dimensional model SFS data: 3-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. """ if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(8, 10)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.95, right=0.95) modelmax = max(masked_model.sum(axis=sax).max() for sax in range(3)) datamax = max(masked_data.sum(axis=sax).max() for sax in range(3)) modelmin = min(masked_model.sum(axis=sax).min() for sax in range(3)) datamin = min(masked_data.sum(axis=sax).min() for sax in range(3)) max_toplot = max(modelmax, datamax) min_toplot = min(modelmin, datamin) if vmax is None: vmax = max_toplot if vmin is None: vmin = min_toplot extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot] # Calculate the residuals if residual == 'Anscombe': resids = [Inference.\ Anscombe_Poisson_residual(masked_model.sum(axis=2-sax), masked_data.sum(axis=2-sax), mask=vmin) for sax in range(3)] elif residual == 'linear': resids =[Inference.\ linear_Poisson_residual(masked_model.sum(axis=2-sax), masked_data.sum(axis=2-sax), mask=vmin) for sax in range(3)] else: raise ValueError("Unknown class of residual '%s'." % residual) min_resid = min([r.min() for r in resids]) max_resid = max([r.max() for r in resids]) if resid_range is None: resid_range = max((abs(max_resid), abs(min_resid))) resid_extend = _extend_mapping[-resid_range <= min_resid, resid_range >= max_resid] if pop_ids is not None: if len(pop_ids) != 3: raise ValueError('pop_ids must be of length 3.') data_ids = model_ids = resid_ids = pop_ids else: data_ids = masked_data.pop_ids model_ids = masked_model.pop_ids if model_ids is None: model_ids = data_ids if model_ids == data_ids: resid_ids = model_ids else: resid_ids = None for sax in range(3): marg_data = masked_data.sum(axis=2 - sax) marg_model = masked_model.sum(axis=2 - sax) curr_ids = [] for ids in [data_ids, model_ids, resid_ids]: if ids is None: ids = ['pop0', 'pop1', 'pop2'] if ids is not None: ids = list(ids) del ids[2 - sax] curr_ids.append(ids) ax = pylab.subplot(4, 3, sax + 1) plot_colorbar = (sax == 2) plot_single_2d_sfs(marg_data, vmin=vmin, vmax=vmax, pop_ids=curr_ids[0], extend=extend, colorbar=plot_colorbar) pylab.subplot(4, 3, sax + 4, sharex=ax, sharey=ax) plot_single_2d_sfs(marg_model, vmin=vmin, vmax=vmax, pop_ids=curr_ids[1], extend=extend, colorbar=False) resid = resids[sax] pylab.subplot(4, 3, sax + 7, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=curr_ids[2], extend=resid_extend, colorbar=plot_colorbar) ax = pylab.subplot(4, 3, sax + 10) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True) ax.set_yticks([]) pylab.show()
def set_data(self, data, **args): if args.get("skipIfSame", 1): if checksum(data) == checksum(self.raw_data): return self.domain_data_stat = [] self.attr_values = {} self.original_data = None self.scaled_data = None self.no_jittering_scaled_data = None self.valid_data_array = None self.raw_data = None self.have_data = False self.data_has_class = False self.data_has_continuous_class = False self.data_has_discrete_class = False self.data_class_name = None self.data_domain = None self.data_class_index = None if data is None: return full_data = data self.raw_data = data len_data = data and len(data) or 0 self.attribute_names = [attr.name for attr in full_data.domain] self.attribute_name_index = dict([ (full_data.domain[i].name, i) for i in range(len(full_data.domain)) ]) self.attribute_flip_info = {} self.data_domain = full_data.domain self.data_has_class = bool(full_data.domain.class_var) self.data_has_continuous_class = full_data.domain.has_continuous_class self.data_has_discrete_class = full_data.domain.has_discrete_class self.data_class_name = self.data_has_class and full_data.domain.class_var.name if self.data_has_class: self.data_class_index = self.attribute_name_index[ self.data_class_name] self.have_data = bool(self.raw_data and len(self.raw_data) > 0) self.domain_data_stat = getCached(full_data, DomainBasicStats, (full_data, )) sort_values_for_discrete_attrs = args.get( "sort_values_for_discrete_attrs", 1) for index in range(len(full_data.domain)): attr = full_data.domain[index] if attr.is_discrete: self.attr_values[attr.name] = [0, len(attr.values)] elif attr.is_continuous: self.attr_values[attr.name] = [ self.domain_data_stat[index].min, self.domain_data_stat[index].max ] if 'no_data' in args: return # the original_data, no_jittering_scaled_data and validArray are arrays # that we can cache so that other visualization widgets don't need to # compute it. The scaled_data on the other hand has to be computed for # each widget separately because of different # jitter_continuous and jitter_size values if getCached(data, "visualizationData"): self.original_data, self.no_jittering_scaled_data, self.valid_data_array = getCached( data, "visualizationData") else: no_jittering_data = np.c_[full_data.X, full_data.Y].T valid_data_array = no_jittering_data != np.NaN original_data = no_jittering_data.copy() for index in range(len(data.domain)): attr = data.domain[index] if attr.is_discrete: # see if the values for discrete attributes have to be resorted variable_value_indices = get_variable_value_indices( data.domain[index], sort_values_for_discrete_attrs) if 0 in [ i == variable_value_indices[attr.values[i]] for i in range(len(attr.values)) ]: # make the array a contiguous, otherwise the putmask # function does not work line = no_jittering_data[index].copy() indices = [ np.where(line == val, 1, 0) for val in range(len(attr.values)) ] for i in range(len(attr.values)): np.putmask(line, indices[i], variable_value_indices[attr.values[i]]) no_jittering_data[ index] = line # save the changed array original_data[ index] = line # reorder also the values in the original data no_jittering_data[index] = ( (no_jittering_data[index] * 2.0 + 1.0) / float(2 * len(attr.values))) elif attr.is_continuous: diff = self.domain_data_stat[ index].max - self.domain_data_stat[ index].min or 1 # if all values are the same then prevent division by zero no_jittering_data[index] = ( no_jittering_data[index] - self.domain_data_stat[index].min) / diff self.original_data = original_data self.no_jittering_scaled_data = no_jittering_data self.valid_data_array = valid_data_array if data: setCached(data, "visualizationData", (self.original_data, self.no_jittering_scaled_data, self.valid_data_array)) # compute the scaled_data arrays scaled_data = self.no_jittering_scaled_data # Random generators for jittering random = np.random.RandomState(seed=self.jitter_seed) rand_seeds = random.random_integers(0, 2**30 - 1, size=len(data.domain)) for index, rseed in zip(list(range(len(data.domain))), rand_seeds): # Need to use a different seed for each feature random = np.random.RandomState(seed=rseed) attr = data.domain[index] if attr.is_discrete: scaled_data[index] += (self.jitter_size / (50.0 * max(1, len(attr.values)))) * \ (random.rand(len(full_data)) - 0.5) elif attr.is_continuous and self.jitter_continuous: scaled_data[index] += self.jitter_size / 50.0 * ( 0.5 - random.rand(len(full_data))) scaled_data[index] = np.absolute( scaled_data[index]) # fix values below zero ind = np.where(scaled_data[index] > 1.0, 1, 0) # fix values above 1 np.putmask(scaled_data[index], ind, 2.0 - np.compress(ind, scaled_data[index])) self.scaled_data = scaled_data[:, :len_data]
def plot_2d_comp_Poisson(model, data, vmin=None, vmax=None, resid_range=None, fig_num=None, pop_ids=None, residual='Anscombe', adjust=True, saveplot=False, nomplot="plot_2d_comp_Poisson", showplot=True): """ Poisson comparison between 2d model and data. model: 2-dimensional model SFS data: 2-dimensional data SFS vmin, vmax: Minimum and maximum values plotted for sfs are vmin and vmax respectively. resid_range: Residual plot saturates at +- resid_range. fig_num: Clear and use figure fig_num for display. If None, an new figure window is created. pop_ids: If not None, override pop_ids stored in Spectrum. residual: 'Anscombe' for Anscombe residuals, which are more normally distributed for Poisson sampling. 'linear' for the linear residuals, which can be less biased. adjust: Should method use automatic 'subplots_adjust'? For advanced manipulation of plots, it may be useful to make this False. """ if data.folded and not model.folded: model = model.fold() masked_model, masked_data = Numerics.intersect_masks(model, data) if fig_num is None: f = pylab.gcf() else: f = pylab.figure(fig_num, figsize=(7, 7)) pylab.clf() if adjust: pylab.subplots_adjust(bottom=0.07, left=0.07, top=0.94, right=0.95, hspace=0.26, wspace=0.26) max_toplot = max(masked_model.max(), masked_data.max()) min_toplot = min(masked_model.min(), masked_data.min()) if vmax is None: vmax = max_toplot if vmin is None: vmin = min_toplot extend = _extend_mapping[vmin <= min_toplot, vmax >= max_toplot] if pop_ids is not None: data_pop_ids = model_pop_ids = resid_pop_ids = pop_ids if len(pop_ids) != 2: raise ValueError('pop_ids must be of length 2.') else: data_pop_ids = masked_data.pop_ids model_pop_ids = masked_model.pop_ids if masked_model.pop_ids is None: model_pop_ids = data_pop_ids if model_pop_ids == data_pop_ids: resid_pop_ids = model_pop_ids else: resid_pop_ids = None ax = pylab.subplot(2, 2, 1) plot_single_2d_sfs(masked_data, vmin=vmin, vmax=vmax, pop_ids=data_pop_ids, colorbar=False) ax.set_title('data') ax2 = pylab.subplot(2, 2, 2, sharex=ax, sharey=ax) plot_single_2d_sfs(masked_model, vmin=vmin, vmax=vmax, pop_ids=model_pop_ids, extend=extend) ax2.set_title('model') if residual == 'Anscombe': resid = Inference.Anscombe_Poisson_residual(masked_model, masked_data, mask=vmin) elif residual == 'linear': resid = Inference.linear_Poisson_residual(masked_model, masked_data, mask=vmin) else: raise ValueError("Unknown class of residual '%s'." % residual) if resid_range is None: resid_range = max((abs(resid.max()), abs(resid.min()))) resid_extend = _extend_mapping[-resid_range <= resid.min(), resid_range >= resid.max()] ax3 = pylab.subplot(2, 2, 3, sharex=ax, sharey=ax) plot_2d_resid(resid, resid_range, pop_ids=resid_pop_ids, extend=resid_extend) ax3.set_title('residuals') ax = pylab.subplot(2, 2, 4) flatresid = numpy.compress(numpy.logical_not(resid.mask.ravel()), resid.ravel()) ax.hist(flatresid, bins=20, normed=True) ax.set_title('residuals') ax.set_yticks([]) if saveplot: nomplot = nomplot + ".pdf" pylab.savefig(nomplot) if showplot: pylab.show()
# ==== Set the integration method ==== mim = gf.MeshIm(m, gf.Integ('IM_TETRAHEDRON(5)')) # ==== Summary ==== print(' ==================================== \n Mesh details: ') print(' Problem dimension:', mfu.qdim(), '\n Number of elements: ', m.nbcvs(), '\n Number of nodes: ', m.nbpts()) print(' Number of dof: ', mfu.nbdof(), '\n Element type: ', mfu.fem()[0].char()) print(' ====================================') # ==== Boundaries detection ==== allPoints = m.pts() # Bottom points and faces cbot = (abs(allPoints[2, :]) < 1e-6) pidbot = np.compress(cbot, list(range(0, m.nbpts()))) fbot = m.faces_from_pid(pidbot) BOTTOM = 1 m.set_region(BOTTOM, fbot) # Top points and faces ctop = (abs(allPoints[2, :]) > dimZ - stepZ) pidtop = np.compress(ctop, list(range(0, m.nbpts()))) ftop = m.faces_from_pid(pidtop) TOP = 2 m.set_region(TOP, ftop) # Left points and faces cleft = (abs(allPoints[0, :]) < 1e-6) pidleft = np.compress(cleft, list(range(0, m.nbpts()))) fleft = m.faces_from_pid(pidleft) LEFT = 3 m.set_region(LEFT, fleft)
def from_contingency(self, cont, nan_adjustment): h_class = _entropy(np.sum(cont, axis=1)) h_residual = _entropy(np.compress(np.sum(cont, axis=0), cont, axis=1)) return nan_adjustment * (h_class - h_residual)
def main(proteinfilename, ligandfilename): U1 = MDAnalysis.Universe(proteinfilename) proteins = U1.select_atoms('protein and not type HD') proteincoods = proteins.positions U2 = MDAnalysis.Universe(ligandfilename) polaratoms = U2.select_atoms( 'type N or type O or type F or type Cl or type Br') numpolaratoms = polaratoms.n_atoms Z = int(os.path.isfile('placedwaters.pdb')) if Z == 0: f1 = open('dockedwaters.pdb', 'w') f1.close() sys.exit('Empty file') U3 = MDAnalysis.Universe('placedwaters.pdb') trialwaters = U3.select_atoms('resname SOL and name OW') trialwatercoods = trialwaters.positions numtrialwaters = trialwatercoods.shape[0] waterscores = np.zeros((numtrialwaters), dtype=float) tempdist = MDAnalysis.lib.distances.distance_array(trialwatercoods, proteincoods) watprodist = np.amin(tempdist, axis=1) for i in xrange(0, numtrialwaters): if watprodist[i] < 3.6 and watprodist[i] > 2.00: comd = 'vina --receptor ' + proteinfilename + ' --num_modes 1 --exhaustiveness 20 --ligand water.pdbqt --size_x 0.5 --size_y 0.5 --size_z 0.5 --out waterout.pdbqt --center_x ' + str( trialwatercoods[i, 0]) + ' --center_y ' + str( trialwatercoods[i, 1]) + ' --center_z ' + str( trialwatercoods[i, 2]) os.system(comd) os.system("grep 'RESULT' waterout.pdbqt > water.txt") A = np.genfromtxt('water.txt', usecols=3, dtype=float) waterscores[i] = A os.remove('water.txt') os.remove('waterout.pdbqt') predictedwatercoods = np.compress(waterscores <= -0.6, trialwatercoods, axis=0) predictedwatercoods = np.float32(predictedwatercoods) numpredictedwaters = predictedwatercoods.shape[0] waterdata = np.genfromtxt('waterdetails.txt', dtype=int) predictedwaterscores1 = np.compress(waterscores <= -0.6, waterscores, axis=0) predictedwaterscores2 = np.reshape(predictedwaterscores1, (numpredictedwaters, 1)) ############################################################################################################## ############################################################################################################## if numpredictedwaters > 1: fit = scipy.cluster.hierarchy.fclusterdata(predictedwatercoods, 2.0, criterion='distance', metric='euclidean') fit = fit.astype(int) numclust = np.max(fit) temppredictedwatercoods = np.zeros((numclust, 3), dtype=float) temppredictedwatercoods = np.float32(temppredictedwatercoods) temppredictedwaterscores = np.zeros((numclust, 1), dtype=float) for i in xrange(1, numclust + 1): clusttemp = np.compress(fit == i, predictedwatercoods, axis=0) tempavg = np.mean(clusttemp, axis=0) temppredictedwatercoods[i - 1, :] = tempavg clusttemp2 = np.compress(fit == i, predictedwaterscores2, axis=0) tempavg2 = np.mean(clusttemp2, axis=0) temppredictedwaterscores[i - 1, 0] = tempavg2 elif numpredictedwaters <= 1: temppredictedwatercoods = predictedwatercoods.copy() temppredictedwaterscores = predictedwaterscores2.copy() ############################################################################################################## ############################################################################################################## allligand = U2.select_atoms('all') allligandcoods = allligand.positions numpredictedwaters = temppredictedwatercoods.shape[0] discardindex = np.zeros((numpredictedwaters, 1), dtype=float) count = 0 for i in range(0, numpolaratoms): if waterdata.size > 2: atomindex = waterdata[i, 0] allowedwaters = waterdata[i, 1] elif waterdata.size == 2: atomindex = waterdata[0] allowedwaters = waterdata[1] atomcoods = np.zeros((1, 3), dtype=float) atomcoods[0, :] = allligandcoods[atomindex, :].copy() atomcoods = np.float32(atomcoods) atwatdist = MDAnalysis.lib.distances.distance_array( temppredictedwatercoods, atomcoods) B = np.where(atwatdist < 3.1) mates = np.ravel_multi_index(B, atwatdist.shape) nummates = np.size(mates) matescores = temppredictedwaterscores[mates] if nummates > allowedwaters: numdiscardedwaters = nummates - allowedwaters for j in xrange(0, numdiscardedwaters): high = np.argmax(matescores) removedindex = mates[high] matescores = np.delete(matescores, high) mates = np.delete(mates, high) discardindex[count, 0] = removedindex count = count + 1 trimmeddiscardindex = discardindex[0:count, :].copy() trimmeddiscardindex = np.transpose(trimmeddiscardindex) trimmeddiscardindex = np.ndarray.astype(trimmeddiscardindex, dtype=int) clusteredwatercoods = np.delete(temppredictedwatercoods, trimmeddiscardindex, axis=0) finalwaterscores = np.delete(temppredictedwaterscores, trimmeddiscardindex, axis=0) writewaterfile('predictedwaters.pdb', clusteredwatercoods, finalwaterscores)
def despine(fig=None, ax=None, top=True, right=True, left=False, bottom=False, offset=None, trim=False): """Remove the top and right spines from plot(s). fig : matplotlib figure, optional Figure to despine all axes of, default uses current figure. ax : matplotlib axes, optional Specific axes object to despine. top, right, left, bottom : boolean, optional If True, remove that spine. offset : int or dict, optional Absolute distance, in points, spines should be moved away from the axes (negative values move spines inward). A single value applies to all spines; a dict can be used to set offset values per side. trim : bool, optional If True, limit spines to the smallest and largest major tick on each non-despined axis. Returns ------- None """ # Get references to the axes we want if fig is None and ax is None: axes = plt.gcf().axes elif fig is not None: axes = fig.axes elif ax is not None: axes = [ax] for ax_i in axes: for side in ["top", "right", "left", "bottom"]: # Toggle the spine objects is_visible = not locals()[side] ax_i.spines[side].set_visible(is_visible) if offset is not None and is_visible: try: val = offset.get(side, 0) except AttributeError: val = offset _set_spine_position(ax_i.spines[side], ('outward', val)) # Potentially move the ticks if left and not right: maj_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.yaxis.minorTicks ) ax_i.yaxis.set_ticks_position("right") for t in ax_i.yaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.yaxis.minorTicks: t.tick2line.set_visible(min_on) if bottom and not top: maj_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.majorTicks ) min_on = any( t.tick1line.get_visible() for t in ax_i.xaxis.minorTicks ) ax_i.xaxis.set_ticks_position("top") for t in ax_i.xaxis.majorTicks: t.tick2line.set_visible(maj_on) for t in ax_i.xaxis.minorTicks: t.tick2line.set_visible(min_on) if trim: # clip off the parts of the spines that extend past major ticks xticks = ax_i.get_xticks() if xticks.size: firsttick = np.compress(xticks >= min(ax_i.get_xlim()), xticks)[0] lasttick = np.compress(xticks <= max(ax_i.get_xlim()), xticks)[-1] ax_i.spines['bottom'].set_bounds(firsttick, lasttick) ax_i.spines['top'].set_bounds(firsttick, lasttick) newticks = xticks.compress(xticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_xticks(newticks) yticks = ax_i.get_yticks() if yticks.size: firsttick = np.compress(yticks >= min(ax_i.get_ylim()), yticks)[0] lasttick = np.compress(yticks <= max(ax_i.get_ylim()), yticks)[-1] ax_i.spines['left'].set_bounds(firsttick, lasttick) ax_i.spines['right'].set_bounds(firsttick, lasttick) newticks = yticks.compress(yticks <= lasttick) newticks = newticks.compress(newticks >= firsttick) ax_i.set_yticks(newticks)
def main(): parser = argparse.ArgumentParser( description= 'This script takes a single zarr zipstore, and estimates the contamination rate, providing a log' 'likelihood ratio vs the null model') parser.add_argument( '--input', required=True, help= 'Path to zarr file containing genotypes and allele depths, zipped Zarr file with data for a single sample.' 'This should follow the standard format of {sample}/{seqid}/calldata/GT and {sample}/{seqid}/calldata/AD.' ) parser.add_argument( '--sites', required=True, help= 'Path to zarr describing which sites in `input` were genotyped. This is used to match the `input` to the' 'allele frequencies below. variants/POS is required.') parser.add_argument( '--allele-frequencies', required=True, help= 'path to zarr file describing allele frequencies. This has two purposes: 1) to select SNPs to downsample' 'to, based on the `minimum_af` argument. 2) To provide a prior expectation on the frequency of genotypes.' 'The first level of the zarr file should be groups for seqids, with each containing `POS` (position) and' 'AF (allele frequencies). The shape of the AF array must be Nx4, where N is the size of the 1D POS array.' 'The order of alleles *must* correspond to the coding in the input data. There is no requirement to have a' 'similar shape to the input genotypes, although a minimum level of intersection is required!' ) parser.add_argument('--seqid', required=True, nargs='+', help='name of chromosome(s) or contig(s) to process. ') parser.add_argument('--output', required=True, help='path to output file stem') parser.add_argument('--downsample', required=False, default=20000, help='number of sites to consider.', type=int) parser.add_argument( '--minimum-af', required=False, default=0.05, help= 'minimum minor allele frequency in reference population to consider. Sites with higher MAF are more ' 'powerful at detecting contamination', type=float) parser.add_argument('--sequence-error-rate', required=False, default=1e-3, help='probability of observing a non REF/ALT base', type=float) parser.add_argument( '--minimum-coverage', required=False, default=10, help= 'minimum read depth to use. Low depths have low power to detect contamination', type=int) parser.add_argument('--plot', dest='plot', action='store_true') parser.add_argument('--no-plot', dest='plot', action='store_false') parser.add_argument('--log', dest='log', action='store_true') parser.add_argument('--no-log', dest='log', action='store_false') parser.set_defaults(plot=True, log=False) try: args = { "input": snakemake.input.input, "sites": snakemake.input.sites, "allele_frequencies": snakemake.input.allele_frequencies, "seqid": snakemake.params.seqid, "output": snakemake.params.stem, "minimum_af": snakemake.params.minimum_af, "minimum_coverage": snakemake.params.minimum_coverage, "sequence_error_rate": snakemake.params.seq_err_rate, "downsample": snakemake.params.downsample, "plot": snakemake.params.plot, "log": snakemake.params.log } log("Args read via snakemake") except NameError: args = vars(parser.parse_args()) log("Args read via command line") seqids = args['seqid'] sequence_error_rate = args['sequence_error_rate'] downsample_n = args["downsample"] minimum_minor_af = args["minimum_af"] output_csv = args['output'] + ".contamination.csv" output_png = args['output'] + ".allele_balance.png" output_log = args["output"] + ".{alpha}.log" sample_store = zarr.ZipStore(args["input"], mode="r") sample_callset = zarr.Group(sample_store) sites = zarr.ZipStore(args["sites"], mode="r") variant_sites = zarr.Group(sites) sample = next(iter(sample_callset)) concatenated_sample_callset, _ = concatenate_arrays( sample_callset[sample], seqids, paths=["calldata/GT", "calldata/AD"]) gt = allel.GenotypeArray(concatenated_sample_callset["calldata/GT"]) ad = concatenated_sample_callset["calldata/AD"] concatenated_sites, concatenated_site_shapes = concatenate_arrays( variant_sites, seqids, ["variants/POS"]) pos = concatenated_sites["variants/POS"] assert pos.shape[0] == gt.shape[0] == ad.shape[ 0], "Shape inconsistency. {0}, {1}, {2}".format( pos.shape, gt.shape, ad.shape) # load allele frequencies required to compute weights allele_frequencies_z = zarr.open_group(args['allele_frequencies'], "r") concatenated_af_arrays, concatenated_af_shapes = concatenate_arrays( allele_frequencies_z, seqids, ["POS", "AF"]) af_pos = concatenated_af_arrays["POS"] # This is a 2D array of the frequency of the ALT allele in some other dataset. af_val = concatenated_af_arrays["AF"] assert af_val.shape[ 1] == 4, "Allele frequencies must contain all 4 alleles, even if unobserved." # for the sample_gt: Keep if # a) in af, b) is_called and c) is_biallelic # step 1 find the intersection this works on multi indexes loc_gt, loc_af = locate_intersection(pos, concatenated_site_shapes, af_pos, concatenated_af_shapes) flt_af_val = np.compress(loc_af, af_val, axis=0) flt_gt = np.compress(loc_gt, gt, axis=0) flt_ad = np.compress(loc_gt, ad, axis=0) # now we need to filter both by is biallelic and is called. is_bial_ref_pop = np.count_nonzero(flt_af_val, axis=1) == 2 is_called = flt_gt.is_called()[:, 0] # compress the intersection by the AND of these keep_loc = is_called & is_bial_ref_pop alt_frequency_pass = np.compress(keep_loc, flt_af_val, axis=0) allele_depth_pass = np.compress(keep_loc, flt_ad, axis=0) # recode the allele depth to 0/1. # find the "alt" column. log("Ordering alleles by frequency for REF/ALT/ERR") min_cov_reached = allele_depth_pass[:, 0].sum( axis=1) >= args['minimum_coverage'] ix_cols_sort = np.argsort(alt_frequency_pass, axis=1)[:, ::-1] # indices of all rows ix_rows = np.arange(alt_frequency_pass.shape[0]) # apply the sorting operation allele_depth_pass_reordered = np.squeeze(allele_depth_pass)[ ix_rows[:, np.newaxis], ix_cols_sort] # Define allele counts: sum final 2 columns, representing ref/alt/error allele_depths = allele_depth_pass_reordered[:, :3] allele_depths[:, 2] = allele_depths[:, 2] + allele_depth_pass_reordered[:, 3] assert allele_depths.shape[1] == 3 # issue with some samples having a third allele (ie not in phase 2) discovered at high frequency # Filter sites where more than 10% of reads look like errors. probably_biallelic = allele_depth_pass_reordered[:, 2] < ( .1 * allele_depth_pass_reordered.sum(axis=1)) # step 2 create the 0/1/2 from the allele frequencies. major_af = alt_frequency_pass.max(axis=1) # select the values with the highest MAF. log("Selecting variants on which to perform analysis") while True: eligible = probably_biallelic & min_cov_reached & ( (1 - major_af) > minimum_minor_af) if eligible.sum() > downsample_n: break minimum_minor_af -= 0.01 if minimum_minor_af < 0: log("Insufficient variants meet criteria to compute contamination. n={0}, min={1}" .format(eligible.sum(), downsample_n)) break res = pd.DataFrame(index=[sample], columns=["LLR", "LL", "pc_contam"]) if eligible.sum() > downsample_n: log("Downsample from {0} to {1}".format(eligible.sum(), downsample_n)) ix_ds = np.sort( np.random.choice(np.where(eligible)[0], size=downsample_n)) major_af = np.take(major_af, ix_ds, axis=0) allele_depths = np.take(allele_depths, ix_ds, axis=0) genotype_weights = np.log(determine_weights(major_af)) log("estimating contamination...") xv = minimize_scalar(compute_likelihood, args=(sequence_error_rate, allele_depths, genotype_weights, args["log"], output_log), bounds=(0, 0.5), method="Bounded", options={"xatol": 1e-6}) # compute the likelihood at alpha = 0, to report likelihood ratio. null = compute_likelihood(0.0, sequence_error_rate, allele_depths, genotype_weights, args["log"], output_log) # return the llr / ll / estimate res.loc[sample] = -min(xv.fun - null, 0), -xv.fun, xv.x * 100 if args['plot']: plot_allele_balance(flt_gt, flt_ad, output_png, res.iloc[0]) res.to_csv(output_csv)
n3 = np.sum(np.where(dr > t2, 1, 0)) assert npks == n2, "debug scoring" assert n3 == len(all_gvecs) - npks, "debug scoring" print(l.r2c) print("Unit cell:", (6 * "%.6f ") % indexing.ubitocellpars(l.r2c)) # Put this grain in the output list ubis.append(l.r2c) # Remove from the gvectors drlv2 = indexing.calc_drlv2(l.r2c, cur_gvecs) # print drlv2[:20] # print cur_gvecs.shape # print drlv2.shape,drlv2[:10],options.tol*options.tol cur_gvecs = rc_array.rc_array(np.compress( drlv2 > options.tol * options.tol, cur_gvecs, axis=0), direction='row') print("Lattice found, indexes", npks, "from all", all) print("Number of unindexed peaks remaining %d" % (len(cur_gvecs))) print("Current vector shape", cur_gvecs.shape) if len(ubis) > 0: indexing.write_ubi_file(options.outfile, ubis) print("Wrote to file", options.outfile) else: print("No unit cell found, sorry, please try again") except: if len(ubis) > 0: indexing.write_ubi_file(options.outfile, ubis) print("Wrote to file", options.outfile) raise
def run_one_perf_test(self): """Method to run a neutral benchmark on a uniform model with the previous selected feature set. We want to see which feature set is the best or has the most information. """ # Get data traindata, testdata = self.trainData, self.testData X, y = traindata # Check if feature set as more than one feature if np.array(self.featset).ndim > 1: self.featset = self.featset[0] # reduce our data set to the selected features X = np.compress(self.featset, X, axis=1) # Neutral model model = sklearn.linear_model.LogisticRegression( multi_class="multinomial", solver="newton-cg", fit_intercept=True ) tuned_parameters = [{"C": np.logspace(-6, 4, 11)}] cv = 5 # We use gridsearch to find good parameters gridsearch = sklearn.model_selection.GridSearchCV( model, tuned_parameters, scoring=None, n_jobs=1, cv=cv, iid=True ) gridsearch.fit(X, y) est = gridsearch.best_estimator_ # Record scores trainScore = est.score(X, y) traindec = est.decision_function(X) trainpredict = est.predict(X) trainy = y # Scores on the testset X_test, y_test = testdata X_test = np.compress(self.featset, X_test, axis=1) testpredict = est.predict(X_test) testscore = est.score(X_test, y_test) # We save the decision function, we can calculate ROC curves later in the analysis # TODO: do we need the decision function?, are there alternatives for ord. Regr? testdec = est.decision_function(X_test) testy = y_test result = Result_Performance( modelname=self.modelname, setname=self.setname, trainScore=trainScore, testScore=testscore, traindec=traindec, trainpredict=trainpredict, trainy=trainy, testdec=testdec, testpredict=testpredict, testy=testy, ) self.result_performance = result return self
def cokernel(A, tol=1e-5): u, s, vh = np.linalg.svd(A) sing=np.zeros(u.shape[1],dtype=np.complex) sing[:s.size]=s null_mask = (sing <= tol) return np.compress(null_mask, u, axis=1)
def clip_data(self, data): """ Returns a list of data values that are within the range. Implements AbstractDataRange. """ return compress(self.mask_data(data), data, axis=0)
def vizq(_ra, _dec, catalogue, radius): ''' Query vizquery ''' _site = 'vizier.u-strasbg.fr' cat = { 'usnoa2': ['I/252/out', 'USNO-A2.0', 'Rmag'], '2mass': ['II/246/out', '2MASS', 'Jmag'], 'landolt': ['II/183A/table2', '', 'Vmag,B-V,U-B,V-R,R-I,Star,e_Vmag'], 'ucac4': [ 'I/322A/out', '', 'Bmag,Vmag,gmag,rmag,imag,e_Vmag,e_Bmag,e_gmag,e_rmag,e_imag,UCAC4' ], 'apass': [ 'II/336/apass9', '', "Bmag,Vmag,g'mag,r'mag,i'mag,e_Vmag,e_Bmag,e_g'mag,e_r'mag,e_i'mag" ], 'usnob1': ['I/284/out', 'USNO-B1.0', 'R2mag'], 'sdss7': ['II/294/sdss7', '', 'objID,umag,gmag,rmag,imag,zmag,gc'], 'sdss9': [ 'V/139/sdss9', '', 'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc' ], 'sdss7': [ 'II/294/sdss7', '', 'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc' ], 'sdss8': [ 'II/306/sdss8', '', 'objID,umag,gmag,rmag,imag,zmag,e_umag,e_gmag,e_rmag,e_imag,e_zmag,gc' ] } a=os.popen('vizquery -mime=tsv -site='+_site+' -source='+cat[catalogue][0]+\ ' -c.ra='+str(_ra)+' -c.dec='+str(_dec)+' -c.eq=J2000 -c.rm='+str(radius)+\ ' -c.geom=b -oc.form=h -sort=_RA*-c.eq -out.add=_RAJ2000,_DEJ2000 -out.max=10000 -out='+\ cat[catalogue][1]+' -out="'+cat[catalogue][2]+'"').read() print 'vizquery -mime=tsv -site='+_site+' -source='+cat[catalogue][0]+\ ' -c.ra='+str(_ra)+' -c.dec='+str(_dec)+' -c.eq=J2000 -c.rm='+str(radius)+\ ' -c.geom=b -oc.form=h -sort=_RA*-c.eq -out.add=_RAJ2000,_DEJ2000 -out.max=10000 -out='+\ cat[catalogue][1]+' -out="'+cat[catalogue][2]+'"' aa = a.split('\n') bb = [] for i in aa: if i and i[0] != '#': bb.append(i) _ra, _dec, _name, _mag = [], [], [], [] for ii in bb[3:]: aa = ii.split('\t') rr, dd = deg2HMS(ra=re.sub(' ', ':', aa[0]), dec=re.sub(' ', ':', aa[1]), round=False) _ra.append(rr) _dec.append(dd) _name.append(aa[2]) dictionary = {'ra': _ra, 'dec': _dec, 'id': _name} sss = string.split(cat[catalogue][2], ',') for ii in sss: dictionary[ii] = [] for ii in bb[3:]: aa = ii.split('\t') for gg in range(0, len(sss)): if sss[gg] not in ['UCAC4', 'id']: try: dictionary[sss[gg]].append(float(aa[2 + gg])) except: dictionary[sss[gg]].append(float(9999)) else: dictionary[sss[gg]].append(str(aa[2 + gg])) if catalogue in ['sdss7', 'sdss9', 'sdss8']: dictionary['u'] = dictionary['umag'] dictionary['g'] = dictionary['gmag'] dictionary['r'] = dictionary['rmag'] dictionary['i'] = dictionary['imag'] dictionary['z'] = dictionary['zmag'] dictionary['uerr'] = dictionary['e_umag'] dictionary['gerr'] = dictionary['e_gmag'] dictionary['rerr'] = dictionary['e_rmag'] dictionary['ierr'] = dictionary['e_imag'] dictionary['zerr'] = dictionary['e_zmag'] for key in dictionary.keys(): if key != 'r': dictionary[key] = np.compress( (np.array(dictionary['r']) < 19) & (np.array(dictionary['r'] > 10)), dictionary[key]) dictionary['r'] = np.compress((np.array(dictionary['r']) < 19) & (np.array(dictionary['r'] > 10)), dictionary['r']) elif catalogue == 'landolt': dictionary['B'] = np.array(dictionary['Vmag']) + np.array( dictionary['B-V']) dictionary['U'] = np.array(dictionary['B']) + np.array( dictionary['U-B']) dictionary['V'] = np.array(dictionary['Vmag']) dictionary['Verr'] = np.array(dictionary['e_Vmag']) dictionary['R'] = np.array(dictionary['Vmag']) - np.array( dictionary['V-R']) dictionary['I'] = np.array(dictionary['R']) - np.array( dictionary['R-I']) dictionary['id'] = np.array(dictionary['Star']) elif catalogue == 'ucac4': dictionary['B'] = np.array(dictionary['Bmag']) dictionary['V'] = np.array(dictionary['Vmag']) dictionary['g'] = np.array(dictionary['gmag']) dictionary['r'] = np.array(dictionary['rmag']) dictionary['i'] = np.array(dictionary['imag']) dictionary['Berr'] = np.array(dictionary['e_Bmag'], float) / 100. dictionary['Verr'] = np.array(dictionary['e_Vmag'], float) / 100. dictionary['gerr'] = np.array(dictionary['e_gmag'], float) / 100. dictionary['rerr'] = np.array(dictionary['e_rmag'], float) / 100. dictionary['ierr'] = np.array(dictionary['e_imag'], float) / 100. dictionary['id'] = np.array(dictionary['UCAC4'], str) for key in dictionary.keys(): if key != 'r': dictionary[key] = np.compress( (np.array(dictionary['r']) < 22) & (np.array(dictionary['r'] > 10.5)), dictionary[key]) dictionary['r'] = np.compress((np.array(dictionary['r']) < 22) & (np.array(dictionary['r'] > 10.5)), dictionary['r']) elif catalogue == 'apass': dictionary['B'] = np.array(dictionary['Bmag']) dictionary['V'] = np.array(dictionary['Vmag']) dictionary['g'] = np.array(dictionary["g'mag"]) dictionary['r'] = np.array(dictionary["r'mag"]) dictionary['i'] = np.array(dictionary["i'mag"]) dictionary['Berr'] = np.array(dictionary['e_Bmag'], float) dictionary['Verr'] = np.array(dictionary['e_Vmag'], float) dictionary['gerr'] = np.array(dictionary["e_g'mag"], float) dictionary['rerr'] = np.array(dictionary["e_r'mag"], float) dictionary['ierr'] = np.array(dictionary["e_i'mag"], float) for key in dictionary.keys(): if key != 'r': dictionary[key] = np.compress( (np.array(dictionary['r']) < 22) & (np.array(dictionary['r'] > 10.5)), dictionary[key]) dictionary['r'] = np.compress((np.array(dictionary['r']) < 22) & (np.array(dictionary['r'] > 10.5)), dictionary['r']) return dictionary
header = nc.variables['header'][:] for icol, obstr in enumerate(nc.variables['obdata'].obinfo.split()): if obstr.startswith('P'): break press = nc.variables['obdata'][:, icol] for icol, obstr in enumerate(nc.variables['obdata'].obinfo.split()): if obstr.startswith(obtype): break obs = nc.variables['obdata'][:, icol] bufrerr = nc.variables['oberr'][:, icol] for icol, obstr in enumerate(nc.variables['gsigesdata'].diaginfo.split()): if obstr.startswith(obtype): break gsiges = nc.variables['gsigesdata'][:, icol] gsianl = nc.variables['gsianldata'][:, icol] enssprd = nc.variables['gsi_ensstd'][:, icol] gsierr = nc.variables['gsierr'][:, icol] used = (nc.variables['gsiqc'][:, icol]).astype('bool') # find indices corresponding to specified obcode, pressure level idx = np.argwhere( \ np.logical_and( header[:,4] == obcode, \ np.abs(level-press) <= 1.0) \ ).squeeze() idx = np.compress(used[idx], idx) # only select obs used by GSI print 'count = ', len(idx) print 'RMS ges departure', np.sqrt(np.mean((obs - gsiges)[idx]**2)) print 'expected ges departure', np.sqrt(np.mean( (enssprd**2 + bufrerr**2)[idx])) print 'RMS anl departure', np.sqrt(np.mean((obs - gsianl)[idx]**2))