def reduction_T_3(self, I): A = logical_or(I[0:-1:2, :], I[1::2, :]) A = logical_and(A[:, 0:-1:2], A[:, 1::2]) B = logical_and(I[0:-1:2, :], I[1::2, :]) B = logical_or(B[:, 0:-1:2], B[:, 1::2]) C = logical_and(A, B) return C
def _check_bounds(self, x_new, y_new): """Check the inputs for being in the bounds of the interpolated data. Args: x_new (float array): y_new (float array): Returns: out_of_bounds (Boolean array): The mask on x_new and y_new of values that are NOT of bounds. """ below_bounds_x = x_new < self._xlim[0] above_bounds_x = x_new > self._xlim[1] below_bounds_y = y_new < self._ylim[0] above_bounds_y = y_new > self._ylim[1] # !! Could provide more information about which values are out of bounds if self.bounds_error and below_bounds_x.any(): raise ValueError("A value in x is below the interpolation " "range.") if self.bounds_error and above_bounds_x.any(): raise ValueError("A value in x is above the interpolation " "range.") if self.bounds_error and below_bounds_y.any(): raise ValueError("A value in y is below the interpolation " "range.") if self.bounds_error and above_bounds_y.any(): raise ValueError("A value in y is above the interpolation " "range.") out_of_bounds = scipy.logical_not(scipy.logical_or(scipy.logical_or(below_bounds_x, above_bounds_x), scipy.logical_or(below_bounds_y, above_bounds_y))) return out_of_bounds
def showVectorDisplacements(): global testImage, croppedRefImage, u, v, valid, q1, umean, vmean, x, y, sxyVar, wxyVar, goodvectorsVar from scipy import where, compress, logical_and, median, logical_or, nan from pylab import resize, transpose, quiver, title, show, find, imshow, hist, figure, clf, draw, save, load, xlabel, ylabel, flipud mxy = 3 wxy = int(wxyVar.get()) sxy = int(sxyVar.get()) goodvectors = float(goodvectorsVar.get()) #process to find PIV-style displacements x, y, u, v, q1, valid = simplepiv(croppedRefImage, testImage, wxy, mxy, sxy) good = where(logical_and(q1 > goodvectors, valid > 0), True, False) umean = median(compress(good.flat, u.flat)) vmean = median(compress(good.flat, v.flat)) u = where(logical_or(q1 < goodvectors, valid < 0), 0, u) v = where(logical_or(q1 < goodvectors, valid < 0), 0, v) u = u - umean v = v - vmean save('vecx.out', x) save('vecy.out', y) save('vecu.out', u) save('vecv.out', v) save('vecq1.out', q1) save('vecvalid.out', valid) u = flipud(u) v = -flipud(v) quiver(x, y, u, v) title('Vector displacements') xlabel('Pixels') ylabel('Pixels') show() return
def MR_boudnary_extraction(self, img=None): if img == None: img = cv2.cvtColor(camera(), cv2.COLOR_RGB2BGR) lab_img = self._MR_saliency__MR_readimg(img) mark_color = (1, 0, 0) labels = self._MR_saliency__MR_superpixel(lab_img) up_img = lab_img.copy() up_ids = sp.unique(labels[0, :]).astype(int) up_mask = sp.zeros(labels.shape).astype(bool) for i in up_ids: up_mask = sp.logical_or(up_mask, labels == i) up_img[up_mask] = mark_color up_img = mark_boundaries(up_img, labels) right_img = lab_img.copy() right_ids = sp.unique(labels[:, labels.shape[1] - 1]).astype(int) right_mask = sp.zeros(labels.shape).astype(bool) for i in right_ids: right_mask = sp.logical_or(right_mask, labels == i) right_img[right_mask] = mark_color right_img = mark_boundaries(right_img, labels) low_img = lab_img.copy() low_ids = sp.unique(labels[labels.shape[0] - 1, :]).astype(int) low_mask = sp.zeros(labels.shape).astype(bool) for i in low_ids: low_mask = sp.logical_or(low_mask, labels == i) low_img[low_mask] = mark_color low_img = mark_boundaries(low_img, labels) left_img = lab_img.copy() left_ids = sp.unique(labels[:, 0]).astype(int) left_mask = sp.zeros(labels.shape).astype(bool) for i in left_ids: left_mask = sp.logical_or(left_mask, labels == i) left_img[left_mask] = mark_color left_img = mark_boundaries(left_img, labels) plt.subplot(2, 2, 1) plt.axis('off') plt.title('up') plt.imshow(up_img) plt.subplot(2, 2, 2) plt.axis('off') plt.title('bottom') plt.imshow(low_img) plt.subplot(2, 2, 3) plt.axis('off') plt.title('left') plt.imshow(left_img) plt.subplot(2, 2, 4) plt.axis('off') plt.title('right') plt.imshow(right_img) plt.show()
def separate_cal(data, n_bins_cal, cal_mask=None) : """Function separates data into cal_on and cal off. No Guarantee that data argument remains unchanged.""" # Allowcate memeory for output ntime, npol, nfreq = data.shape n_bins_after_cal = ntime//n_bins_cal out_data = sp.zeros((n_bins_after_cal, npol, 2, nfreq), dtype=sp.float32) # Get the phase offset of the cal. try : if cal_mask is None: first_on, n_blank = get_cal_mask(data, n_bins_cal) else : first_on, n_blank = cal_mask except ce.DataError : print "Discarded record due to bad profile. " out_data[:] = float('nan') else : # How many samples for each cal state. n_cal_state = n_bins_cal//2 - n_blank first_off = (first_on + n_bins_cal//2) % n_bins_cal # Reshape data to add an index to average over. data.shape = (n_bins_after_cal, n_bins_cal) + data.shape[1:] # Get the masks for the on and off data. inds = sp.arange(n_bins_cal) if first_on == min((sp.arange(n_cal_state) + first_on)% n_bins_cal) : on_mask = sp.logical_and(inds >= first_on, inds < first_on+n_cal_state) else : on_mask = sp.logical_or(inds >= first_on, inds < (first_on + n_cal_state) % n_bins_cal) if first_off == min((sp.arange(n_cal_state) + first_off)% n_bins_cal) : off_mask = sp.logical_and(inds >= first_off, inds < first_off + n_cal_state) else : off_mask = sp.logical_or(inds >= first_off, inds < (first_off + n_cal_state) % n_bins_cal) # Find cal on and cal off averages. Always use mean not median due to # discretization noise. # This loop is much faster than the built in numpy mean() for some # reason. for ii in range(n_bins_cal) : if on_mask[ii]: out_data[:,:,0,:] += data[:,ii,:,:] elif off_mask[ii]: out_data[:,:,1,:] += data[:,ii,:,:] out_data[:,:,0,:] /= sp.sum(on_mask) out_data[:,:,1,:] /= sp.sum(off_mask) return out_data
def velocity_dof(domain, ax): # Calculate velocity dof numbers forr each cell rm = roll( domain, 1, axis=ax ) type_3 = logical_and( domain, rm ) type_2 = logical_or( domain, rm ) dof = cumsum( logical_not( logical_or( type_3, type_2 ) ) ).reshape( domain.shape ) - 1 # Do logic to figure out type 2 and 3 dof[type_2 == 1] = -2 dof[type_3 == 1] = -3 return dof.astype(int64)
def triangulate_debug(canvas, options=None): print("### Starting interpolation in debug mode ###") # Depending on whether we are in GUI mode or not, we will use the # matplotlib.pyplot library directly, or through the DebugWindow interface # (defined below), which wraps the graphs in a tkinter window. This is # necessary as tkinter doesn't play well with matplotlib windows and # plt.show() would not stop execution until the main Magic2 windows is # closed (and that's not very useful). By redefining plt we are able to # leave the code in this function alone, so that it still works in # headless mode (with main_old.py) global plt if options is not None: plt = DebugWindow(options) tri = Triangulation(sp.transpose(sp.nonzero(canvas.fringes_image_clean)), canvas) plt.imshow(canvas.fringes_image_clean, cmap=m2graphics.cmap) # Plot the triangulation. Flat triangles will be green plt.triplot(tri.points[:, 1], tri.points[:, 0], tri.get_simplices()) plt.triplot(tri.points[:, 1], tri.points[:, 0], [tri.triangles[i].vertices for i in tri.flat_triangles]) plt.show() # plt.triplot(tri.points[:, 1], tri.points[:, 0], tri.dt.simplices) print("Optimisation") tri.optimise() print("Finished") # Plot the optimised triangulation. Flat triangles will be green plt.imshow(canvas.fringe_phases, cmap=m2graphics.cmap) # print(tri.flat_triangles) # print(added_points) # plt.triplot(tri.points[:, 1], tri.points[:, 0], [tri.triangles[i].vertices for i in tri.flat_triangles]) plt.triplot(tri.points[:, 1], tri.points[:, 0], tri.get_simplices()) plt.triplot(tri.points[:, 1], tri.points[:, 0], [tri.triangles[i].vertices for i in tri.flat_triangles]) plt.show() # Perform interpolation print("Interpolating") tri.interpolate(canvas) plt.imshow(sp.ma.masked_where( sp.logical_or(canvas.mask == False, canvas.interpolated == -1024.0), canvas.interpolated), cmap=m2graphics.cmap) plt.show() plt.imshow(sp.ma.masked_where( sp.logical_or(canvas.mask == False, canvas.interpolated == -1024.0), canvas.interpolated), cmap=m2graphics.cmap) plt.triplot(tri.points[:, 1], tri.points[:, 0], tri.get_simplices()) plt.show()
def _check_bounds(self,x_new): # If self.bounds_error = 1, we raise an error if any x_new values # fall outside the range of x. Otherwise, we return an array indicating # which values are outside the boundary region. # !! Needs some work for multi-dimensional x !! below_bounds = less(x_new,self.x[0]) above_bounds = greater(x_new,self.x[-1]) # Note: sometrue has been redefined to handle length 0 arrays # !! Could provide more information about which values are out of bounds # RHC -- Changed these ValueErrors to PyDSTool_BoundsErrors if self.bounds_error and any(sometrue(below_bounds)): ## print "Input:", x_new ## print "Bound:", self.x[0] ## print "Difference input - bound:", x_new-self.x[0] raise PyDSTool_BoundsError, " A value in x_new is below the"\ " interpolation range." if self.bounds_error and any(sometrue(above_bounds)): ## print "Input:", x_new ## print "Bound:", self.x[-1] ## print "Difference input - bound:", x_new-self.x[-1] raise PyDSTool_BoundsError, " A value in x_new is above the"\ " interpolation range." # !! Should we emit a warning if some values are out of bounds. # !! matlab does not. out_of_bounds = logical_or(below_bounds,above_bounds) return out_of_bounds
def _check_bounds(self, x_new): # If self.bounds_error = 1, we raise an error if any x_new values # fall outside the range of x. Otherwise, we return an array indicating # which values are outside the boundary region. # !! Needs some work for multi-dimensional x !! below_bounds = less(x_new, self.x[0]) above_bounds = greater(x_new, self.x[-1]) # Note: sometrue has been redefined to handle length 0 arrays # !! Could provide more information about which values are out of bounds # RHC -- Changed these ValueErrors to PyDSTool_BoundsErrors if self.bounds_error and any(sometrue(below_bounds)): ## print "Input:", x_new ## print "Bound:", self.x[0] ## print "Difference input - bound:", x_new-self.x[0] raise PyDSTool_BoundsError, " A value in x_new is below the"\ " interpolation range." if self.bounds_error and any(sometrue(above_bounds)): ## print "Input:", x_new ## print "Bound:", self.x[-1] ## print "Difference input - bound:", x_new-self.x[-1] raise PyDSTool_BoundsError, " A value in x_new is above the"\ " interpolation range." # !! Should we emit a warning if some values are out of bounds. # !! matlab does not. out_of_bounds = logical_or(below_bounds, above_bounds) return out_of_bounds
def normMn(n, vals, x1, x2, ymin, yatx): output = scipy.zeros(vals.shape) avg = (x1+x2)/2 diff = abs(x2-x1) output = pow(2,n)*(yatx-ymin)*pow((vals-avg)/diff,n) + ymin output[scipy.logical_or(vals < x1, vals > x2)] = 0. return output
def normM2(vals, x1, x2, ymin, yatx): output = scipy.zeros(vals.shape) #temp = (x1+x2)/(2*x1*x2) F = 4*(ymin-yatx)/(x2-x1)/(x1-x2) output = F*(vals-x1)*(vals-x2) + yatx output[scipy.logical_or(vals < x1, vals > x2)] = 0. return output
def have_same_subd_decomp(dds0, dds1): """ Returns :samp:`True` if pairs of non-halo-sub-domains on all processes have the same (global) non-halo-sub-domain origin index and same non-halo-sub-domain shape. Note: performs an MPI *allreduce* operation. :type dds0: :obj:`mango.Dds` :param dds0: Array. :type dds1: :obj:`mango.Dds` :param dds1: Array. :rtype: :obj:`bool` :return: :samp:`True` if MPI non-halo-subdomain decomposition is the same for :samp:`dds0` and :samp:`dds1`. """ numDiff = 0 if (sp.any(sp.logical_or(dds0.subd.origin != dds1.subd.origin, (dds0.subd.shape != dds1.subd.shape)))): numDiff = 1 mpiComm = None if (hasattr(dds0, "mpi") and hasattr(dds0.mpi, "comm") and (dds0.mpi.comm != None)): mpiComm = dds0.mpi.comm numDiff = mpiComm.allreduce(numDiff, op=mango.mpi.SUM) return (numDiff == 0)
def updateTime(val): psi.clear() t_idx = int(timeSlider.val) psi.set_xlim([0.5, 1.2]) psi.set_ylim([-0.8, 0.8]) title.set_text('LIUQE Reconstruction, $t = %(t).2f$ s' % {'t':t[t_idx]}) psi.set_xlabel('$R$ [m]') psi.set_ylabel('$Z$ [m]') if macx is not None: psi.plot(macx, macy, 'k', linewidth=3, zorder=5) elif limx is not None: psi.plot(limx,limy,'k',linewidth=3,zorder=5) # catch NaNs separating disjoint sections of R,ZLCFS in mask maskarr = scipy.where(scipy.logical_or(RLCFS[t_idx] > 0.0,scipy.isnan(RLCFS[t_idx]))) RLCFSframe = RLCFS[t_idx,maskarr[0]] ZLCFSframe = ZLCFS[t_idx,maskarr[0]] psi.plot(RLCFSframe,ZLCFSframe,'r',linewidth=3,zorder=3) if fill: psi.contourf(rGrid,zGrid,psiRZ[t_idx],50,zorder=2) psi.contour(rGrid,zGrid,psiRZ[t_idx],50,colors='k',linestyles='solid',zorder=3) else: psi.contour(rGrid,zGrid,psiRZ[t_idx],50,colors='k') if mask: patchdraw = psi.add_patch(patch) patchdraw.set_zorder(4) psi.add_patch(tilesP) psi.add_patch(vesselP) psi.set_xlim([0.5, 1.2]) psi.set_ylim([-0.8, 0.8]) fluxPlot.canvas.draw()
def showContours_v(): global umean, vmean, modelgreylevelvar, goodvectorsVar, lowerLevelVar, upperLevelVar, tickvar, numcontourVar from scipy import zeros, where, logical_or, r_, argmin, shape, ravel, nan, compress, flipud from pylab import imshow, clf, title, save, load, figure, contourf, cm, hold, contour, xlabel, ylabel x = load('vecx.out.npy') y = load('vecy.out.npy') u = load('vecu.out.npy') v = load('vecv.out.npy') q1 = load('vecq1.out.npy') valid = load('vecvalid.out.npy') modelgreylevel = float(modelgreylevelVar.get()) goodvectors = float(goodvectorsVar.get()) v = where(logical_or(q1 < goodvectors, valid < 0), modelgreylevel, v) v = -flipud(v) lowerLevel = float(lowerLevelVar.get()) upperLevel = float(upperLevelVar.get()) numcontour = int(numcontourVar.get()) tick = float(upperLevel - lowerLevel) / numcontour vv = r_[lowerLevel:upperLevel:tick] figure() contourf(x, y, v, vv, cmap=cm.gray) contourf(x, y, v, vv, cmap=cm.gray) xlabel('Pixels') ylabel('Pixels') return
def normMn2(n, vals, x1, x2, ymin): output = scipy.zeros(vals.shape) avg = (x1+x2)/2 diff = abs(x2-x1) output = pow(2,n)*(1-ymin)*pow((vals-avg)/diff,n) + ymin output[scipy.logical_or(vals < x1, vals > x2)] = 0. output /= scipy.sum(output) return output
def despike(samplein): ''' replace spikes with the median of sourounding 3 values if they are more than 50% (or only a third) of the median ''' sample = samplein.copy() med = ndimage.median_filter(sample, size=3, mode='mirror') mask = sp.logical_or(sample / med > 1.5, sample / med < 0.66) sample[mask] = med[mask] return sample
def _detectOutliers(self): """ Binary vector for which dimension saw an outlier gradient: considers oultier curvature as well. """ hs = mean(self._last_diaghessians, axis=0) var = (self._vhbar - self._hbar**2) / self.batch_size res = logical_or(vSGD._detectOutliers(self), (hs - self._hbar)**2 > self.outlier_level**2 * var) del hs, var return res
def despike(samplein): ''' replace spikes with the median of sourounding 3 values if they are more than 50% (or only a third) of the median ''' sample=samplein.copy() med = ndimage.median_filter(sample,size=3,mode='mirror') mask = sp.logical_or(sample/med > 1.5, sample/med < 0.66) sample[mask] = med[mask] return sample
def _detectOutliers(self): """ Binary vector for which dimension saw an outlier gradient: considers oultier curvature as well. """ hs = mean(self._last_diaghessians, axis=0) var = (self._vhbar - self._hbar ** 2) / self.batch_size res = logical_or(vSGD._detectOutliers(self), (hs - self._hbar) ** 2 > self.outlier_level ** 2 * var) del hs, var return res
def get_data(self, col): if self.task == 'HSmice_data_REVISIONS': self.trait = self.measures[col] self.pheno = self.all_pheno[:, col] covs2use = self.all_covs2use[col].split(',') Ic = sp.zeros(self.measures.shape[0], dtype=bool) for cov in covs2use: Ic = sp.logical_or(Ic, self.measures == cov) self.covs = self.all_covs[:, Ic] elif self.task == 'HSmice_hip_REVISIONS': self.trait = self.measures[col] self.pheno = self.all_pheno[:, col] covs2use = ['GENDER', 'group_size'] Ic = sp.zeros(self.measures.shape[0], dtype=bool) for cov in covs2use: Ic = sp.logical_or(Ic, self.measures == cov) self.covs = self.all_covs[:, Ic] elif 'HSmice_simulations' in self.task: self.trait = self.measures[col] self.pheno = self.all_pheno[:, col] self.covs = None else: print "Nothing done: task unknown!" return { 'trait': self.trait, 'pheno': self.pheno, 'pheno_ID': self.pheno_ID, 'covs': self.covs, 'covs_ID': self.covs_ID, 'kinship_type': self.kinship_type, 'kinship_full': self.kinship_full, 'kinship_full_ID': self.kinship_full_ID, 'cage_full': self.cage_full, 'cage_full_ID': self.cage_full_ID, 'subset_IDs': self.subset_IDs }
def get_data(self,col): if self.task == 'HSmice_data_REVISIONS': self.trait=self.measures[col] self.pheno = self.all_pheno[:,col] covs2use = self.all_covs2use[col].split(',') Ic = sp.zeros(self.measures.shape[0],dtype=bool) for cov in covs2use: Ic = sp.logical_or(Ic,self.measures==cov) self.covs = self.all_covs[:,Ic] elif self.task == 'HSmice_hip_REVISIONS': self.trait=self.measures[col] self.pheno = self.all_pheno[:,col] covs2use = ['GENDER','group_size'] Ic = sp.zeros(self.measures.shape[0],dtype=bool) for cov in covs2use: Ic = sp.logical_or(Ic,self.measures==cov) self.covs = self.all_covs[:,Ic] elif 'HSmice_simulations' in self.task: self.trait = self.measures[col] self.pheno = self.all_pheno[:,col] self.covs = None else: print "Nothing done: task unknown!" return {'trait' : self.trait, 'pheno' : self.pheno, 'pheno_ID' : self.pheno_ID, 'covs' : self.covs, 'covs_ID' : self.covs_ID, 'kinship_type' : self.kinship_type, 'kinship_full' : self.kinship_full, 'kinship_full_ID' : self.kinship_full_ID, 'cage_full' : self.cage_full, 'cage_full_ID' : self.cage_full_ID, 'subset_IDs' : self.subset_IDs}
def idxLineOrCurve(self, contour, corners): threshDist = 3 isLinePt = np.zeros([1, len(contour)], dtype=bool) for i in range(0, 2): point1 = corners[2 * i, :] point2 = corners[2 * i + 1, :] kLine = point2 - point1 kLineNorm = kLine / np.linalg.norm(kLine) normVector = np.asarray([-kLineNorm[1], kLineNorm[0]]) distance = abs((contour - point1).dot(normVector)) isInlierLine = distance <= threshDist isLinePt = scipy.logical_or(isLinePt, isInlierLine) linePtsIdx = np.where(isLinePt == True)[1] curvePtsIdx = np.where(isLinePt == False)[1] return linePtsIdx, curvePtsIdx
def __init__(self, filename, fi=None, m=None, imshow=None): # An image is loaded, and only its first colour component is taken # out of red, green, blue, alpha. # The .png images supplied are greyscale. self.error = False try: if filename != 'dump' and fi is None: # The image may have 3-4 channels if its RGB(A), we only # need one of them try: image = plt.imread(filename.name)[:, :, 0] # If the image is greyscale, just take the whole thing except IndexError: image = plt.imread(filename.name) # Fringes are black, extract them from the image self.fringes_image = image == 0 # This is the user defined mask, it was grey (so neither black # nor white, which is the condition we're using here) self.mask = sp.logical_or(image == 1, self.fringes_image) else: # This uses a provide mask and image (for example from an .m2 file) self.fringes_image = fi self.mask = m # This will store only the labelled fringes, currently empty self.fringes_image_clean = sp.zeros_like(self.fringes_image)-0 # -1024 indicates an area where there is no data # Visual stores the fringe phases, but allows for width, making # the fringes easier to display self.fringe_phases_visual = sp.zeros_like(self.fringes_image)-1024 # In fringe_phases all the fringes have their initial width self.fringe_phases = sp.zeros_like(self.fringes_image)-1024 # Indexing starts at 0, so -1 is a good choice for 'not an index' self.fringe_indices = sp.zeros_like(self.fringes_image)-1 # x and y are used during interpolation processes to make # calculations easier, they store the x and y position of # every pixel self.x, self.y = sp.meshgrid(sp.arange(0, len(self.fringes_image[0])), sp.arange(0, len(self.fringes_image))) self.xy = sp.transpose([self.y.ravel(), self.x.ravel()]) # Interpolated will store the interpolated version of the image self.interpolation_done = False self.interpolated = sp.zeros_like(self.fringes_image)-1024.0 # this parameter will store the object returned by matplotlib's # imshow function, making it easy to change the data being displayed self.imshow = imshow except OSError: self.error = True
def run(self, frame): self.prev_frame = self.frame self.frame = frame from copy import copy self.orientation = copy(self.frame) self.histogram_equalize() self.dist(self.frame, self.prev_frame) self.edge_frame = self.canny(frame) self.hand_colored_frame = self.hand_color_detect(frame) self.remove_background() self.canny(self.frame) # self.res = scipy.logical_and(self.hand_colored_frame, self.edge_frame) # self.res = scipy.logical_and(self.remove_background_frame, self.res) # self.res = scipy.logical_and(self.hand_colored_frame, self.remove_background_frame) self.res = self.remove_background_frame self.res = scipy.logical_or(self.res, self.frame_edge()) # self.res = self.remove_background_frame self.res = 255 * self.res self.res = self.res.astype(np.uint8) # self.res = cv2.GaussianBlur(self.res, (5, 5), 0) # self.res = cv2.erode(self.res, None, iterations=1) # self.res = cv2.dilate(self.res, None, iterations=1) # self.res = cv2.erode(self.res, None, iterations=2) self.orientation_line() line, m = self.base_line_func() rows, cols = self.res.shape angle = math.degrees(math.atan(m))*-1 rotateMatrixM = cv2.getRotationMatrix2D((cols/2, rows/2), angle, 1) dst = cv2.warpAffine(self.res,rotateMatrixM,(cols,rows)) # color_dst = cv2.warpAffine(self.frame, rotateMatrixM, (cols, rows)) dst = cv2.GaussianBlur(dst, (5, 5), 7) for y in xrange(dst.shape[0]): for x in xrange(dst.shape[1]): if dst[y, x] > 90: dst[y, x] = 255 else: dst[y, x] = 0 dst = cv2.erode(dst, None, iterations=2) dst = cv2.dilate(dst, None, iterations=1) # now f*****g count fingers! cr, rr = dst.shape cr, rr = int(cr*1.15), int(rr*1.15) self.res = cv2.resize(dst, (rr, cr))
def log_density(self, x, rdn_meas, geom, bounds): """Log probability density combines prior and likelihood terms""" # First check bounds if bounds is not None and any(s.logical_or(x < bounds[0], x > bounds[1])): return -s.Inf # Prior term Sa = self.fm.Sa(x, geom) xa = self.fm.xa(x, geom) pa = self.stable_mvnpdf(xa, Sa, x) # Data likelihood term Seps = self.fm.Seps(x, rdn_meas, geom) Seps_win = s.array([Seps[i, self.winidx] for i in self.winidx]) rdn_est = self.fm.calc_rdn(x, geom, rfl=None, Ls=None) pm = self.stable_mvnpdf(rdn_est[self.winidx], Seps_win, rdn_meas[self.winidx]) return pa+pm
def __read_geometry(self): """reads in bmp and generates contacts and other objects of interest""" from PIL import Image from scipy import where, asarray, array, transpose, logical_or, logical_and, cos, pi from aux import Contact img = Image.open(self.atlas) arr = asarray(img) self.p.canvas = arr self.p.Boffset = arr.shape[0] / 10 self.p.Bdummy = arr.shape[0] / 5 self.p.raw_coords = logical_and(arr > 0, arr % 5 == 0).nonzero() self.p.tuple_canvas_coordinates = tuple(zip(*self.p.raw_coords)) self.p.El = 2 * self.p.t0 * (1 - cos(pi / (self.p.canvas.shape[1] - 2))) contacts = [] shades = [(103, 115), (133, 145), (163, 175), (193, 205)] contact_index = 0 for shade in shades: contact_temp_tuple_coords = tuple(zip(*where(arr == shade[1]))) interface_temp_tuple_coords = tuple( zip(*logical_or(arr == shade[0], arr == shade[1]).nonzero())) a = Contact(contact_temp_tuple_coords, interface_temp_tuple_coords) if a.length == 0: continue a.index = contact_index # try: # if any(arr[a.interface_raw_coordinates[0][0]+1,:]==239): # a.SO = True # else: # a.SO = False # except IndexError: # if any(arr[a.interface_raw_coordinates[0][0]-1,:] == 239): # a.SO = True # else: # a.SO = False contacts.append(a) contact_index += 1 if len(contacts) == 0: print '--------------------------------------' print 'No contacts found! This will not work!' print '--------------------------------------' # from pudb import set_trace; set_trace() self.p.contacts = contacts
def score_accuracy(y_predicted, y_real): """ score = (y_real AND y_predicted)/(y_real OR y_predicted) @param y_predicted: @param y_real: @return: score """ if isinstance(y_predicted, np.ndarray): y_predicted = y_predicted.ravel() else: y_predicted = y_predicted.toarray().ravel() y_real = y_real.toarray().ravel() numerator = np.sum(1. * (logical_and(y_real, y_predicted))) denominator = np.sum(1. * (logical_or(y_real, y_predicted))) return numerator / denominator
def __read_geometry(self): """reads in bmp and generates contacts and other objects of interest""" from PIL import Image from scipy import where, asarray, array, transpose,logical_or,logical_and,cos,pi from aux import Contact img = Image.open(self.atlas) arr = asarray(img) self.p.canvas = arr self.p.Boffset = arr.shape[0]/10 self.p.Bdummy = arr.shape[0]/5 self.p.raw_coords =logical_and(arr > 0,arr %5 ==0).nonzero() self.p.tuple_canvas_coordinates = tuple(zip(*self.p.raw_coords)) self.p.El=2*self.p.t0*(1-cos(pi/(self.p.canvas.shape[1]-2))) contacts = [] shades = [(103,115), (133,145), (163,175), (193,205)] contact_index = 0 for shade in shades: contact_temp_tuple_coords = tuple(zip(*where(arr == shade[1]))) interface_temp_tuple_coords = tuple(zip(*logical_or(arr == shade[0],arr ==shade[1]).nonzero())) a = Contact(contact_temp_tuple_coords, interface_temp_tuple_coords) if a.length == 0: continue a.index = contact_index # try: # if any(arr[a.interface_raw_coordinates[0][0]+1,:]==239): # a.SO = True # else: # a.SO = False # except IndexError: # if any(arr[a.interface_raw_coordinates[0][0]-1,:] == 239): # a.SO = True # else: # a.SO = False contacts.append(a) contact_index +=1 if len(contacts) == 0: print '--------------------------------------' print 'No contacts found! This will not work!' print '--------------------------------------' # from pudb import set_trace; set_trace() self.p.contacts = contacts
def updateTime(val): # clear the figure # enPlot.clear() psi.clear() # check the timining on the slider t_idx = int(timeSlider.val) # redo the plot for the psi # first find the closest index to the t_idx indPsi = numpy.argmin(numpy.abs(eq.getTimeBase() - times[t_idx])) psi.set_xlim([0.5, 1.2]) psi.set_ylim([-0.8, 0.8]) psi.set_title('LIUQE Reconstruction, $t = %(t).2f$ s' % {'t': eq.getTimeBase()[indPsi]}) psi.set_xlabel('$R$ [m]') psi.set_ylabel('$Z$ [m]') # # catch NaNs separating disjoint sections of R,ZLCFS in mask maskarr = scipy.where( scipy.logical_or(RLCFS[indPsi] > 0.0, scipy.isnan(RLCFS[indPsi]))) RLCFSframe = RLCFS[indPsi, maskarr[0]] ZLCFSframe = ZLCFS[indPsi, maskarr[0]] psi.plot(RLCFSframe, ZLCFSframe, 'r', lw=3, zorder=3) psi.contour(rGrid, zGrid, psiRZ[indPsi], 50, colors='k') psi.add_patch(tilesP) psi.add_patch(vesselP) psi.set_xlim([0.5, 1.2]) psi.set_ylim([-0.8, 0.8]) # redo the plot for the density # enPlot.plot(enAvg[enAvg.dims[0]],enAvg.values/1e19, 'b') # enPlot.set_ylabel(r'n$_e$ [10$^{19}$ fringes]', fontsize = 16) # # add a dashed line with the first time of thomson data chosen # enPlot.axvline(times[t_idx], linestyle ='--', color = 'orange') axv.set_xdata(times[t_idx]) # redo the plot for the profile rho = eq.rz2psinorm(rPos, zPos, times[t_idx], sqrt=True) pr.set_xdata(rho[dataEn[:, t_idx] != -1]) pr.set_ydata(dataEn[dataEn[:, t_idx] != -1, t_idx] / 1e19) thPlot.set_ylim([0, dataEn[:, t_idx].max() / 1e19]) mainPlot.canvas.draw_idle()
def coordinate_datasets(reference_genotype_file, hdf5_file, summary_dict, validation_genotype_file=None, genetic_map_dir=None, min_maf=0.01, skip_coordination=False, max_freq_discrep = 0.15, debug=False): summary_dict[3.9]={'name':'dash', 'value':'Coordination'} t0 = time.time() if validation_genotype_file is not None: print('Coordinating datasets (Summary statistics, LD reference genotypes, and Validation genotypes).') else: print('Coordinating datasets (Summary statistics and LD reference genotypes).') plinkf = plinkfile.PlinkFile(reference_genotype_file) # Figure out chromosomes and positions. if debug: print('Parsing plinkf_dict_val reference genotypes') loci = plinkf.get_loci() plinkf.close() summary_dict[4]={'name':'Num individuals in LD Reference data:','value':plinkfiles.get_num_indivs(reference_genotype_file)} summary_dict[4.1]={'name':'SNPs in LD Reference data:','value':len(loci)} gf_chromosomes = [l.chromosome for l in loci] chromosomes = sp.unique(gf_chromosomes) chromosomes.sort() chr_dict = plinkfiles.get_chrom_dict(loci, chromosomes) if validation_genotype_file is not None: if debug: print('Parsing LD validation bim file') plinkf_val = plinkfile.PlinkFile(validation_genotype_file) # Loads only the individuals... plinkf_dict_val = plinkfiles.get_phenotypes(plinkf_val) loci_val = plinkf_val.get_loci() plinkf_val.close() summary_dict[5]={'name':'SNPs in Validation data:','value':len(loci_val)} chr_dict_val = plinkfiles.get_chrom_dict(loci_val, chromosomes) # Open HDF5 file and prepare out data assert not 'iids' in hdf5_file, 'Something is wrong with the HDF5 file, no individuals IDs were found.' if plinkf_dict_val['has_phenotype']: hdf5_file.create_dataset('y', data=plinkf_dict_val['phenotypes']) summary_dict[6]={'name':'Num validation phenotypes:','value':plinkf_dict_val['num_individs']} hdf5_file.create_dataset('fids', data=sp.array(plinkf_dict_val['fids'], dtype=util.fids_dtype)) hdf5_file.create_dataset('iids', data=sp.array(plinkf_dict_val['iids'], dtype=util.iids_dtype)) maf_adj_risk_scores = sp.zeros(plinkf_dict_val['num_individs']) # Now summary statistics ssf = hdf5_file['sum_stats'] cord_data_g = hdf5_file.create_group('cord_data') num_common_snps = 0 # corr_list = [] chromosomes_found = set() num_snps_common_before_filtering =0 num_snps_common_after_filtering =0 tot_num_non_matching_nts = 0 tot_num_non_supported_nts = 0 tot_num_ambig_nts = 0 tot_num_freq_discrep_filtered_snps = 0 tot_num_maf_filtered_snps = 0 tot_g_ss_nt_concord_count = 0 if validation_genotype_file is not None: tot_g_vg_nt_concord_count = 0 tot_vg_ss_nt_concord_count = 0 # Now iterate over chromosomes chrom_i = 0 for chrom in chromosomes: chrom_i +=1 if not debug: sys.stdout.write('\r%0.2f%%' % (100.0 * (float(chrom_i) / (len(chromosomes)+1)))) sys.stdout.flush() try: chr_str = 'chrom_%d' % chrom ssg = ssf[chr_str] except Exception as err_str: print(err_str) print('Did not find chromosome %d in SS dataset.'%chrom) print('Continuing.') continue if debug: print('Coordinating data for chromosome %s' % chr_str) chromosomes_found.add(chrom) #Get summary statistics chromosome group ssg = ssf['chrom_%d' % chrom] ss_sids = (ssg['sids'][...]).astype(util.sids_u_dtype) if validation_genotype_file is not None: chrom_d_val = chr_dict_val[chr_str] vg_sids = chrom_d_val['sids'] common_sids = sp.intersect1d(ss_sids, vg_sids) # A map from sid to index for validation data vg_sid_dict = {} for i, sid in enumerate(vg_sids): vg_sid_dict[sid] = i else: common_sids = ss_sids # A map from sid to index for summary stats ss_sid_dict = {} for i, sid in enumerate(ss_sids): ss_sid_dict[sid] = i #The indices to retain for the LD reference genotypes chrom_d = chr_dict[chr_str] g_sids = chrom_d['sids'] common_sids = sp.intersect1d(common_sids, g_sids) # A map from sid to index for LD reference data g_sid_dict = {} for i, sid in enumerate(g_sids): g_sid_dict[sid] = i if debug: print('Found %d SNPs on chrom %d that were common across all datasets' % (len(common_sids), chrom)) print('Ordering SNPs by genomic positions (based on LD reference genotypes).') g_snp_map = [] for sid in common_sids: g_snp_map.append(g_sid_dict[sid]) # order by positions (based on LD reference file) g_positions = sp.array(chrom_d['positions'])[g_snp_map] order = sp.argsort(g_positions) g_snp_map = sp.array(g_snp_map)[order] g_snp_map = g_snp_map.tolist() common_sids = sp.array(common_sids)[order] # Get the ordered sum stats SNPs indices. ss_snp_map = [] for sid in common_sids: ss_snp_map.append(ss_sid_dict[sid]) # Get the ordered validation SNPs indices if validation_genotype_file is not None: vg_snp_map = [] for sid in common_sids: vg_snp_map.append(vg_sid_dict[sid]) vg_nts = sp.array(chrom_d_val['nts']) vg_nts_ok = sp.array(vg_nts)[vg_snp_map] g_nts = sp.array(chrom_d['nts']) ss_nts = (ssg['nts'][...]).astype(util.nts_u_dtype) betas = ssg['betas'][...] log_odds = ssg['log_odds'][...] if 'freqs' in ssg: ss_freqs = ssg['freqs'][...] g_ss_nt_concord_count = sp.sum( g_nts[g_snp_map] == ss_nts[ss_snp_map]) / 2.0 if validation_genotype_file is not None: vg_ss_nt_concord_count = sp.sum(vg_nts_ok == ss_nts[ss_snp_map]) / 2.0 g_vg_nt_concord_count = sp.sum(g_nts[g_snp_map] == vg_nts_ok) / 2.0 if debug: print('Nucleotide concordance counts out of %d genotypes, vg-rg: %d ; vg-ss: %d' % (len(g_snp_map), g_vg_nt_concord_count, vg_ss_nt_concord_count)) tot_vg_ss_nt_concord_count += vg_ss_nt_concord_count tot_g_vg_nt_concord_count += g_vg_nt_concord_count tot_g_ss_nt_concord_count += g_ss_nt_concord_count if debug: print('Nucleotide concordance counts out of %d genotypes, rg-ss: %d' % (len(g_snp_map), g_ss_nt_concord_count)) num_freq_discrep_filtered_snps = 0 num_non_matching_nts = 0 num_non_supported_nts = 0 num_ambig_nts = 0 # Identifying which SNPs have nucleotides that are ok.. ok_nts = [] ok_indices = {'g': [], 'ss': []} if validation_genotype_file is not None: ok_indices['vg']=[] #Now loop over SNPs to coordinate nucleotides. if validation_genotype_file is not None: for g_i, vg_i, ss_i in zip(g_snp_map, vg_snp_map, ss_snp_map): # To make sure, is the SNP id the same? assert g_sids[g_i] == vg_sids[vg_i] == ss_sids[ss_i], 'Some issues with coordinating the genotypes.' g_nt = g_nts[g_i] if not skip_coordination: vg_nt = vg_nts[vg_i] ss_nt = ss_nts[ss_i] # Is the nucleotide ambiguous. g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 continue # First check if nucleotide is sane? if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_supported_nts += 1 continue os_g_nt = sp.array( [util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]]]) flip_nts = False #Coordination is a bit more complicate when validation genotypes are provided.. if not ((sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt)) and (sp.all(g_nt == vg_nt) or sp.all(os_g_nt == vg_nt))): if sp.all(g_nt == vg_nt) or sp.all(os_g_nt == vg_nt): flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or ( os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) # Try flipping the SS nt if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg: ss_freqs[ss_i] = 1 - ss_freqs[ss_i] else: if debug: print("Nucleotides don't match after all?: g_sid=%s, ss_sid=%s, g_i=%d, ss_i=%d, g_nt=%s, ss_nt=%s" % \ (g_sids[g_i], ss_sids[ss_i], g_i, ss_i, str(g_nt), str(ss_nt))) num_non_matching_nts += 1 continue else: num_non_matching_nts += 1 continue # Opposite strand nucleotides # everything seems ok. ok_indices['g'].append(g_i) ok_indices['vg'].append(vg_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) else: for g_i, ss_i in zip(g_snp_map, ss_snp_map): # To make sure, is the SNP id the same? assert g_sids[g_i] == ss_sids[ss_i], 'Some issues with coordinating the genotypes.' g_nt = g_nts[g_i] if not skip_coordination: ss_nt = ss_nts[ss_i] # Is the nucleotide ambiguous. g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 continue # First check if nucleotide is sane? if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_matching_nts += 1 continue os_g_nt = sp.array( [util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]]]) flip_nts = False #Coordination is a bit more complicate when validation genotypes are provided.. if not sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt): flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or ( os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) # Try flipping the SS nt if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg and ss_freqs[ss_i]>0: ss_freqs[ss_i] = 1.0 - ss_freqs[ss_i] else: if debug: print("Nucleotides don't match after all?: g_sid=%s, ss_sid=%s, g_i=%d, ss_i=%d, g_nt=%s, ss_nt=%s" % \ (g_sids[g_i], ss_sids[ss_i], g_i, ss_i, str(g_nt), str(ss_nt))) num_non_matching_nts += 1 continue # everything seems ok. ok_indices['g'].append(g_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) if debug: print('%d SNPs had ambiguous nucleotides.' % num_ambig_nts) print('%d SNPs were excluded due to nucleotide issues.' % num_non_matching_nts) # Resorting by position positions = sp.array(chrom_d['positions'])[ok_indices['g']] # Now parse SNPs .. snp_indices = sp.array(chrom_d['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices = snp_indices[ok_indices['g']] raw_snps, freqs = plinkfiles.parse_plink_snps( reference_genotype_file, snp_indices) snp_stds = sp.sqrt(2 * freqs * (1 - freqs)) snp_means = freqs * 2 betas = betas[ok_indices['ss']] log_odds = log_odds[ok_indices['ss']] ns = ssg['ns'][...][ok_indices['ss']] ps = ssg['ps'][...][ok_indices['ss']] nts = sp.array(ok_nts) sids = (ssg['sids'][...]).astype(util.sids_u_dtype) sids = sids[ok_indices['ss']] #Parse validation genotypes, if available if validation_genotype_file is not None: snp_indices_val = sp.array(chrom_d_val['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices_val = snp_indices_val[ok_indices['vg']] raw_snps_val, freqs_val = plinkfiles.parse_plink_snps( validation_genotype_file, snp_indices_val) snp_stds_val = sp.sqrt(2 * freqs_val * (1 - freqs_val)) snp_means_val = freqs_val * 2 # Check SNP frequencies, screen for possible problems.. if max_freq_discrep<1 and 'freqs' in ssg: ss_freqs = ss_freqs[ok_indices['ss']] ok_freq_snps = sp.logical_or(sp.absolute(ss_freqs - freqs) < max_freq_discrep,sp.absolute(ss_freqs + freqs-1) < max_freq_discrep) #Array of np.bool values ok_freq_snps = sp.logical_or(ok_freq_snps,ss_freqs<=0) #Only consider SNPs that actually have frequencies num_freq_discrep_filtered_snps = len(ok_freq_snps)- sp.sum(ok_freq_snps) assert num_freq_discrep_filtered_snps>=0, "Problems when filtering SNPs with frequency discrepencies" if num_freq_discrep_filtered_snps>0: # Filter freq_discrepancy_snps raw_snps = raw_snps[ok_freq_snps] snp_stds = snp_stds[ok_freq_snps] snp_means = snp_means[ok_freq_snps] freqs = freqs[ok_freq_snps] ps = ps[ok_freq_snps] ns = ns[ok_freq_snps] positions = positions[ok_freq_snps] nts = nts[ok_freq_snps] sids = sids[ok_freq_snps] betas = betas[ok_freq_snps] log_odds = log_odds[ok_freq_snps] if validation_genotype_file is not None: raw_snps_val = raw_snps_val[ok_freq_snps] snp_stds_val = snp_stds_val[ok_freq_snps] snp_means_val = snp_means_val[ok_freq_snps] freqs_val = freqs_val[ok_freq_snps] if debug: print('Filtered %d SNPs due to frequency discrepancies'%num_freq_discrep_filtered_snps) # Filter minor allele frequency SNPs. maf_filter = (freqs > min_maf) * (freqs < (1 - min_maf)) num_maf_filtered_snps = len(maf_filter)-sp.sum(maf_filter) assert num_maf_filtered_snps>=0, "Problems when filtering SNPs with low minor allele frequencies" if num_maf_filtered_snps>0: raw_snps = raw_snps[maf_filter] snp_stds = snp_stds[maf_filter] snp_means = snp_means[maf_filter] freqs = freqs[maf_filter] ps = ps[maf_filter] ns = ns[maf_filter] positions = positions[maf_filter] nts = nts[maf_filter] sids = sids[maf_filter] betas = betas[maf_filter] log_odds = log_odds[maf_filter] if validation_genotype_file is not None: raw_snps_val = raw_snps_val[maf_filter] snp_stds_val = snp_stds_val[maf_filter] snp_means_val = snp_means_val[maf_filter] freqs_val = freqs_val[maf_filter] if debug: print('Filtered %d SNPs due to low MAF'%num_maf_filtered_snps) genetic_map = [] if genetic_map_dir is not None: with gzip.open(genetic_map_dir + 'chr%d.interpolated_genetic_map.gz' % chrom) as f: for line in f: l = line.split() # if l[0] in sid_set: # genetic_map.append(l[0]) else: genetic_map = None coord_data_dict = {'chrom': 'chrom_%d' % chrom, 'raw_snps_ref': raw_snps, 'snp_stds_ref': snp_stds, 'snp_means_ref': snp_means, 'freqs_ref': freqs, 'ps': ps, 'ns': ns, 'positions': positions, 'nts': nts, 'sids': sids, 'genetic_map': genetic_map, 'betas': betas, 'log_odds': log_odds} if validation_genotype_file is not None: maf_adj_prs = sp.dot(log_odds, raw_snps_val) if debug and plinkf_dict_val['has_phenotype']: maf_adj_corr = sp.corrcoef(plinkf_dict_val['phenotypes'], maf_adj_prs)[0, 1] print('Log odds, per genotype PRS correlation w phenotypes for chromosome %d was %0.4f' % (chrom, maf_adj_corr)) coord_data_dict['raw_snps_val']=raw_snps_val coord_data_dict['snp_stds_val']=snp_stds_val coord_data_dict['snp_means_val']=snp_means_val coord_data_dict['freqs_val']=freqs_val coord_data_dict['log_odds_prs']=maf_adj_prs maf_adj_risk_scores += maf_adj_prs write_coord_data(cord_data_g, coord_data_dict, debug=debug) if debug: print('%d SNPs were retained on chromosome %d.' % (len(sids), chrom)) num_snps_common_before_filtering += len(common_sids) num_snps_common_after_filtering += len(sids) tot_num_ambig_nts += num_ambig_nts tot_num_non_supported_nts += num_non_supported_nts tot_num_non_matching_nts += num_non_matching_nts tot_num_freq_discrep_filtered_snps += num_freq_discrep_filtered_snps tot_num_maf_filtered_snps += num_maf_filtered_snps if not debug: sys.stdout.write('\r%0.2f%%\n' % (100.0)) sys.stdout.flush() # Now calculate the prediction r^2 if validation_genotype_file: if debug and plinkf_dict_val['has_phenotype']: maf_adj_corr = sp.corrcoef( plinkf_dict_val['phenotypes'], maf_adj_risk_scores)[0, 1] print('Log odds, per PRS correlation for the whole genome was %0.4f (r^2=%0.4f)' % (maf_adj_corr, maf_adj_corr ** 2)) print('Overall nucleotide concordance counts: rg_vg: %d, rg_ss: %d, vg_ss: %d' % (tot_g_vg_nt_concord_count, tot_g_ss_nt_concord_count, tot_vg_ss_nt_concord_count)) else: if debug: print('Overall nucleotide concordance counts, rg_ss: %d' % (tot_g_ss_nt_concord_count)) summary_dict[7]={'name':'Num chromosomes used:','value':len(chromosomes_found)} summary_dict[8]={'name':'SNPs common across datasets:','value':num_snps_common_before_filtering} summary_dict[9]={'name':'SNPs retained after filtering:','value':num_snps_common_after_filtering} if tot_num_ambig_nts>0: summary_dict[10]={'name':'SNPs w ambiguous nucleotides filtered:','value':tot_num_ambig_nts} if tot_num_non_supported_nts>0: summary_dict[10.1]={'name':'SNPs w unknown/unsupported nucleotides filtered:','value':tot_num_non_supported_nts} if tot_num_non_matching_nts>0: summary_dict[11]={'name':'SNPs w other nucleotide discrepancies filtered:','value':tot_num_non_matching_nts} if min_maf>0: summary_dict[12]={'name':'SNPs w MAF<%0.3f filtered:'%min_maf,'value':tot_num_maf_filtered_snps} if max_freq_discrep<0.5: summary_dict[13]={'name':'SNPs w allele freq discrepancy > %0.3f filtered:'%max_freq_discrep,'value':tot_num_freq_discrep_filtered_snps} t1 = time.time() t = (t1 - t0) summary_dict[13.9]={'name':'dash', 'value':'Running times'} summary_dict[15]={'name':'Run time for coordinating datasets:','value': '%d min and %0.2f sec'%(t / 60, t % 60)}
def add_genewise_score(icd_gene_clinical, rand_score, measure, nrand, alterations): all_mend_gn = mendelian_code.get_mendelian_genes(icd_gene_clinical) for alt in rand_score: ### add genewise scores genewise_score = rand_score[alt]['disease_score'].copy(deep=True) genewise_score.iloc[:,:] = 1 #disease_score = rand_score[alt]['disease_score'].copy(deep=True) #disease_score.iloc[:,:] = 1 gene_winner = [['']*rand_score[alt]['disease_score'].shape[1] for i in range(len(icd_gene_clinical))] gene_connect = [['']*rand_score[alt]['disease_score'].shape[1] for i in range(len(icd_gene_clinical))] set_connected = [['']*rand_score[alt]['disease_score'].shape[1] for i in range(len(icd_gene_clinical))] set_connection = [['']*rand_score[alt]['disease_score'].shape[1] for i in range(len(icd_gene_clinical))] for row, md in enumerate(rand_score[alt]['disease_score'].index): #mend_gn = icd_gene_clinical[md]['gene_omim'].keys() #if alt == 'del_peak' and md == 'Specified Anomalies of the Musculoskeletal System': # pdb.set_trace() mend_gn = [g for g in all_mend_gn if g in icd_gene_clinical[md]['gene_omim']] if len(mend_gn) == 0: continue gene_scores = rand_score[alt]['gene_score'].loc[mend_gn,:] #B background_set = set(background[alt].index) - set(mend_gn) #B background_probability = [0]*gene_scores.shape[1] #B gscore_vs_background = [0]*gene_scores.shape[1] gene_sp = ['']*gene_scores.shape[1] connect = ['']*gene_scores.shape[1] set_gene_con = ['']*gene_scores.shape[1] set_connect = ['']*gene_scores.shape[1] genewise_score.iloc[row,:] = gene_scores.min(axis=0) gsa = np.array(gene_scores) gsel = np.nonzero(scipy.logical_and(scipy.logical_or(gsa == scipy.tile(gsa.min(axis=0), (gsa.shape[0],1)), gsa <= .05*float(nrand)), gsa < nrand)) for (c_i, c) in enumerate(gene_scores.columns): #B background_genes = background_set - alterations[alt][c][1] #B p = (background[alt].loc[background_genes, c] < .05).sum()/float(len(background_genes)) #B background_probability[c_i] =p #B n_siggene = (gene_scores.loc[:,c] < .05*float(nrand)).sum(axis=0) #B gscore_vs_background[c_i] = 1 - stats.binom.cdf(n_siggene - 1, gene_scores.shape[0], p) #B if n_siggene == 0: #B gscore_vs_background[c_i] = 1.0 genes =[] conns = [] for i in gsel[0][gsel[1]==c_i]: g = gene_scores.index[i] genes += [g] conns += [measure[alt]['connection'][all_mend_gn.index(g)][c_i].strip(',')] gene_sp[c_i] = ','.join(genes) connect[c_i] = ';'.join(conns) genes = [] conns = [] #if (md =='"Pervasive, Specified Congenital Anomalies"' ) and (c == 'GBM'): pdb.set_trace() for g in gene_scores.index: c_conn = measure[alt]['connection'][all_mend_gn.index(g)][c_i].strip(",") if not c_conn == '': genes += [g] conns += [c_conn] set_gene_con[c_i] = ','.join(genes) set_connect[c_i] = ';'.join(conns) #B genewise_score.iloc[row,:] = gscore_vs_background gene_winner[row] = gene_sp gene_connect[row] = connect set_connected[row] = set_gene_con set_connection[row] = set_connect rand_score[alt]['genewise_disease_score'] = genewise_score*1/float(nrand) rand_score[alt]['genewise_disease_best'] = gene_winner rand_score[alt]['genewise_disease_connect'] = gene_connect rand_score[alt]['set_connected'] = set_connected rand_score[alt]['set_connection'] = set_connection
def scale_by_cal(Data, scale_t_ave=True, scale_f_ave=False, sub_med=False, scale_f_ave_mod=False, rotate=False) : """Puts all data in units of the cal temperature. Data is put into units of the cal temperature, thus removing dependence on the gain. This can be done by dividing by the time average of the cal (scale_t_ave=True, Default) thus removing dependence on the frequency- dependant gain. Alternatively, you can scale by the frequency average to remove the time-dependent gain (scale_f_ave=True). Data is then in units of the frequency averaged cal temperture. You can also do both (recommended). After some scaling the data ends up in units of the cal temperture as a funciton of frequency. Optionally you can also subtract the time average of the data off here (subtract_time_median), since you might be done with the cal information at this point. """ on_ind = 0 off_ind = 1 if (Data.field['CAL'][on_ind] != 'T' or Data.field['CAL'][off_ind] != 'F') : raise ce.DataError('Cal states not in expected order.') if tuple(Data.field['CRVAL4']) == (-5, -7, -8, -6) : # Here we check the polarizations and cal indicies xx_ind = 0 yy_ind = 3 xy_inds = [1,2] # A bunch of calculations used to test phase closure. Not acctually # relevant to what is being done here. #a = (Data.data[5, xy_inds, on_ind, 15:20] # - Data.data[5, xy_inds, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, xx_ind, on_ind, 15:20] # - Data.data[5, xx_ind, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, yy_ind, on_ind, 15:20] # - Data.data[5, yy_ind, off_ind, 15:20]) #print a[0,:]**2 + a[1,:]**2 diff_xx = Data.data[:,xx_ind,on_ind,:] - Data.data[:,xx_ind,off_ind,:] diff_yy = Data.data[:,yy_ind,on_ind,:] - Data.data[:,yy_ind,off_ind,:] if scale_t_ave : # Find the cal means (in time) and scale by them. # Means work much better than medians. Medians seems to bias the # result by up to 10%. This seems to be discretization noise. Cal # switches fast enough that we shouldn't need this anyway. cal_tmed_xx = ma.mean(diff_xx, 0) cal_tmed_yy = ma.mean(diff_yy, 0) cal_tmed_xx[sp.logical_or(cal_tmed_xx<=0, cal_tmed_yy<=0)] = ma.masked cal_tmed_yy[cal_tmed_xx.mask] = ma.masked Data.data[:,xx_ind,:,:] /= cal_tmed_xx Data.data[:,yy_ind,:,:] /= cal_tmed_yy Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_tmed_yy*cal_tmed_xx) if scale_f_ave : # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) # Flag data with wierd cal power. Still Experimental. cal_fmea_xx[sp.logical_or(cal_fmea_xx<=0,cal_fmea_yy<=0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) cal_fmea_xx[sp.logical_or(abs(cal_fmea_xx.anom()) >= 0.1*cal_xx, abs(cal_fmea_yy.anom()) >= 0.1*cal_yy)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:,xx_ind,:,:] /= cal_fmea_xx Data.data[:,yy_ind,:,:] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_fmea_yy*cal_fmea_xx) if scale_f_ave_mod : # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) cal_fmea_xx_off = operation(Data.data[:,xx_ind,off_ind,:], -1) cal_fmea_yy_off = operation(Data.data[:,yy_ind,off_ind,:], -1) sys_xx = cal_fmea_xx_off/cal_fmea_xx sys_yy = cal_fmea_yy_off/cal_fmea_yy percent_ok = 0.03 sys_xx_tmed = ma.median(sys_xx) sys_yy_tmed = ma.median(sys_yy) maskbad_xx = (sys_xx > sys_xx_tmed + sys_xx_tmed*percent_ok)|(sys_xx < sys_xx_tmed - sys_xx_tmed*percent_ok) maskbad_yy = (sys_yy > sys_yy_tmed + sys_yy_tmed*percent_ok)|(sys_yy < sys_yy_tmed - sys_yy_tmed*percent_ok) cal_fmea_xx[sp.logical_or(cal_fmea_xx<=0,cal_fmea_yy<=0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_fmea_xx[maskbad_xx] = ma.masked cal_fmea_yy[maskbad_yy] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:,xx_ind,:,:] /= cal_fmea_xx Data.data[:,yy_ind,:,:] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:,xy_inds,:,:] /= ma.sqrt(cal_fmea_yy*cal_fmea_xx) if scale_f_ave and scale_t_ave : # We have devided out t_cal twice so we need to put one factor back # in. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:,xx_ind,:,:] *= cal_xx Data.data[:,yy_ind,:,:] *= cal_yy Data.data[:,xy_inds,:,:] *= ma.sqrt(cal_yy*cal_xx) if scale_f_ave_mod and scale_t_ave : #Same divide out twice problem. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:,xx_ind,:,:] *= cal_xxcal_imag_mean Data.data[:,yy_ind,:,:] *= cal_yy Data.data[:,xy_inds,:,:] *= ma.sqrt(cal_yy*cal_xx) if scale_f_ave and scale_f_ave_mod : raise ce.DataError("time averaging twice") if rotate: # Define the differential cal phase to be zero and rotate all data # such that this is true. cal_real_mean = ma.mean(Data.data[:,1,0,:] - Data.data[:,1,1,:], 0) cal_imag_mean = ma.mean(Data.data[:,2,0,:] - Data.data[:,2,1,:], 0) # Get the cal phase angle as a function of frequency. cal_phase = -ma.arctan2(cal_imag_mean, cal_real_mean) # Rotate such that the cal phase is zero. Imperative to have a # temporary variable. New_data_real = (ma.cos(cal_phase) * Data.data[:,1,:,:] - ma.sin(cal_phase) * Data.data[:,2,:,:]) New_data_imag = (ma.sin(cal_phase) * Data.data[:,1,:,:] + ma.cos(cal_phase) * Data.data[:,2,:,:]) Data.data[:,1,:,:] = New_data_real Data.data[:,2,:,:] = New_data_imag elif tuple(Data.field['CRVAL4']) == (1, 2, 3, 4) : # For the shot term, just devide everything by on-off in I. I_ind = 0 cal_I_t = Data.data[:,I_ind,on_ind,:] - Data.data[:,I_ind,off_ind,:] cal_I = ma.mean(cal_I_t, 0) Data.data /= cal_I else : raise ce.DataError("Unsupported polarization states.") # Subtract the time median if desired. if sub_med : Data.data -= ma.median(Data.data, 0)
def get_LFP_RT(dates,blocks,key,sequential=False,task='multi',**kwargs): ''' Assumes code structure is 2,15, cotarg, lfptarg, 28, 5,6,7,9,30 This take RT as (28) - lfptarg sequential = true returns a 1d array with RT and targets in the order that they occurred ''' if key.LFP_only: rew_key = key.lfp_only_rew else: rew_key = key.rew if ('use_kin_strobed' in kwargs.keys() and kwargs['use_kin_strobed']): fname_ext='_kin' backup_ext = '' print 'using kin' else: fname_ext='' backup_ext = '_kin' if task=='phaseBMI': targ_keys = [64, 68] else: targ_keys = key.lfp_targ n = [] RT = dict() # for tg in targ_keys: # RT[tg]=np.empty((1,)) if 'anim' in kwargs.keys(): anim = kwargs['anim'] else: anim = 'seba' for d,day in enumerate(dates): for b,bl in enumerate(blocks[d][0]): fname = anim+day+bl+fname_ext+'.mat' fname_backup = anim+day+bl+backup_ext+'.mat' try: d = sio.loadmat(key.pre+fname,variable_names='Strobed') print 'loading kin' except: d = sio.loadmat(key.pre+fname_backup, variable_names='Strobed') print 'fname: ', fname strobed = d['Strobed'] if task=='multi': #Get reward trials tsk_ind = np.nonzero(strobed[:,1]==rew_key)[0] elif task=='cue_flash': #Get init trials tsk_ind = np.nonzero(strobed[:,1]==key.mc_go)[0] elif scipy.logical_or(task == 'mc', task=='MC'): tsk_ind = np.nonzero(strobed[:,1]==rew_key)[0] elif task=='phaseBMI': tsk_ind = np.nonzero(strobed[:,1]==rew_key)[0] init = np.empty((len(tsk_ind),)) targ = np.empty((len(tsk_ind),)) go_cue_time = np.empty((len(tsk_ind),)) n.extend([len(tsk_ind)]) for j,t_ind in enumerate(tsk_ind): if key.LFP_only: start = np.max([t_ind - 4, 0]) elif task=='multi': start = np.max([t_ind - 8, 0]) elif task=='cue_flash': start = np.max([t_ind - 5, 0]) elif task == 'mc': start = np.max([t_ind - 4, 0]) elif task == 'phaseBMI': start = np.max([t_ind - 7, 0]) tmp = strobed[start:t_ind,1] #Codes from trial try: init_ind = start+[i for i,t in enumerate(list(tmp)) if np.any(t==targ_keys)][-1] init[j] = int(init_ind) #Target index in strobed targ[j] = strobed[int(init[j]),1] #LFP target ID if key.LFP_only: go_cue_time[j] = strobed[int(t_ind)-2,0] elif task=='phaseBMI': go_cue_time[j] = strobed[int(t_ind)-3, 0] else: go_cue_time[j] = 0 #Generated in other functions except: print goose print 'help!' #Take diff between time entered into periphery (code 28) and init (codes 84-87): rt = strobed[init.astype(int)+1,0] - strobed[init.astype(int),0] if 'RT_sequ' in locals(): RT_sequ = np.hstack((RT_sequ,rt)) targ_sequ = np.hstack((targ_sequ,targ)) go_cue_sequ = np.hstack((go_cue_sequ, go_cue_time)) else: RT_sequ = rt targ_sequ = targ go_cue_sequ = go_cue_time for t,tg in enumerate(targ_keys): targ_ind = np.nonzero(targ==tg)[0] if tg in RT.keys(): RT[tg] = np.hstack((RT[tg],rt[targ_ind])) else: RT[tg] = rt[targ_ind] print 'done with ' + day +'!' if sequential and key.LFP_only: return RT_sequ, targ_sequ, n, go_cue_sequ elif sequential and task=='phaseBMI': return RT_sequ, targ_sequ, n, go_cue_sequ elif sequential: return RT_sequ, targ_sequ, n else: return RT, n
def RT_files(dates,blocks,key,fname,sequential=True,task='multi',key_save=None, **kwargs): sequ = dict() if 'only_basics' in kwargs.keys(): abs_time_go, n = get_MC_RTs(dates,blocks,key,sequential=True,task=task,**kwargs) trial_type = 0 if 'only_basics_plus_MC' in kwargs.keys(): print abs_time_go, n, MC_label = get_MC_RTs(dates,blocks,key,sequential=True,task=task,**kwargs) trial_type = 0 sequ['MC_label'] = MC_label else: if task=='LFP_only': #NOT IMPLEMENTED ELSEWHERE, fix LFP_RT, LFP_label, n, abs_time_go = get_LFP_RT(dates,blocks,key,sequential=True,task=task,**kwargs) sequ['LFP_RT'] = LFP_RT sequ['LFP_label'] = LFP_label elif scipy.logical_or(task=='MC',task=='mc'): MC1_RT, MC2_RT, MC_label, LFP_label, abs_time_go, n, trial_type = get_MC_RTs(dates,blocks,key, sequential=True,task=task,**kwargs) sequ['MC1_RT'] = MC1_RT sequ['MC2_RT'] = MC2_RT sequ['MC_label'] = MC_label elif task == 'targ_jump': MC1_RT, MC2_RT, MC_label1, MC_label2, abs_time_go, n, trial_type = get_MC_RTs(dates,blocks,key, sequential=True,task=task,**kwargs) #Here, MC1 = 5--> 6, MC2 = t2 -->7 #MC_label1 = MC targ 1 #MC_label2 = MC targ 2 sequ['MC1_RT'] = MC1_RT sequ['MC2_RT'] = MC2_RT sequ['MC_label1'] = MC_label1 sequ['MC_label2'] = MC_label2 elif task == 'phaseBMI': phase_RT, phase_label, n, abs_time_go = get_LFP_RT(dates,blocks,key,sequential=True, task=task,**kwargs) sequ['phase_RT'] = phase_RT sequ['phase_label'] = phase_label trial_type = np.ones((np.sum(n),))+8 else: LFP_RT, LFP_label, n = get_LFP_RT(dates,blocks,key,sequential=True,task=task,**kwargs) MC1_RT, MC2_RT, MC_label, LFP_label, abs_time_go, n, trial_type = get_MC_RTs(dates,blocks,key, sequential=True,task=task,**kwargs) sequ['MC1_RT'] = MC1_RT sequ['MC2_RT'] = MC2_RT sequ['MC_label'] = MC_label sequ['LFP_RT'] = LFP_RT sequ['LFP_label'] = LFP_label sequ['abs_time_go'] = abs_time_go sequ['n'] = n sequ['trial_type'] = trial_type if fname is not False: if key_save is None: sio.savemat(key.pre+fname,sequ) else: sio.savemat(key_save.pre+fname,sequ) else: return sequ
nc = Dataset(os.path.join(path,filename),mode='r') time = num2date(nc.variables['time'][:],nc.variables['time'].units) G = nc.groups OBS = G['OBS_d22'] os.system('mkdir -p '+ppath+varname) # Specify which WM sensor to use for validation try: sensor = bestWMsensor[station] except KeyError: sensor = 0 obs = ma.array(OBS.variables[varname][sensor]) obs.data[obs.mask==True] = sp.nan # make sure all masked values are nan obs.mask = sp.logical_or(obs.mask, sp.isnan(obs.data)) units = OBS.variables[varname].units if (all(sp.isnan(obs.data)) or all(obs.mask==True)): print 'no data for '+station+' during '+timestr continue # select variable from each model: modeldata = select_var_from_models(G,varname) # # append to list for all stations: obs_all.append(obs) for gname, var in modeldata.iteritems(): if gname in mod_all.keys():
def get_psi_map(e, plot=True, t_val=None): ''' Get Psi map only in confined region. The first argument is an 'eqtools' tree, the second the time [s] to get psi map at. ''' if plot and t_val is None: print('Plotting at t=1.2 s') t_val = 1.2 times = e.getTimeBase() # get major radius on axis R0 = e.getMagR() # flux on R,Z grids psiRZ = e.getFluxGrid() rGrid = e.getRGrid(length_unit='m') zGrid = e.getZGrid(length_unit='m') # coordinates along LCFS RLCFS = e.getRLCFS(length_unit='m') ZLCFS = e.getZLCFS(length_unit='m') # find center (largest flux) R0 = e.getMagR() Z0 = e.getMagZ() psiRZ_masked = np.zeros_like(psiRZ) # interpolate psiRZ on denser grid Rdense = np.linspace(np.min(rGrid), np.max(rGrid), 200) Zdense = np.linspace(np.min(zGrid), np.max(zGrid), 250) RR, ZZ = np.meshgrid(Rdense, Zdense) psinormRZ_dense = e.rz2phinorm( RR, ZZ, times, sqrt=True) #normalized sqrt of poloidal flux coordinate # conditions used to identify the confined plasma region at every time step cond1 = np.stack([ZZ<np.min(ZLCFS[t_idxx,\ scipy.where(scipy.logical_or(RLCFS[t_idxx,:]>0.0,scipy.isnan(RLCFS[t_idxx,:])))[0]])\ for t_idxx in range(len(times))]) cond2 = np.stack([ZZ>np.max(ZLCFS[t_idxx,\ scipy.where(scipy.logical_or(RLCFS[t_idxx,:] > 0.0,scipy.isnan(RLCFS[t_idxx,:])))[0]])\ for t_idxx in range(len(times))]) mask = np.logical_or( np.logical_or(np.logical_or(cond1, cond2), psinormRZ_dense > 1), np.isnan(psinormRZ_dense)) psinormRZ_masked = np.ma.masked_array(psinormRZ_dense, mask=mask, fill_value=0.0) if plot: # plot at a specific time, given by the user t_idx = np.argmin(np.abs(times - t_val)) maskarr = scipy.where( scipy.logical_or(RLCFS[t_idx] > 0.0, scipy.isnan(RLCFS[t_idx]))) RLCFSframe = RLCFS[t_idx, maskarr[0]] ZLCFSframe = ZLCFS[t_idx, maskarr[0]] fluxPlot = plt.figure(figsize=(6, 11)) gs = mplgs.GridSpec(2, 1, height_ratios=[30, 1]) psi = fluxPlot.add_subplot(gs[0, 0]) xlim = psi.get_xlim() ylim = psi.get_ylim() # dummy plot to get x,ylims psi.contour(rGrid, zGrid, psiRZ[0], 1) # add LCFS contour psi.plot(RLCFSframe, ZLCFSframe, 'r', linewidth=2.0, zorder=3) # plot masked flux surfaces psi.contourf(Rdense, Zdense, psinormRZ_masked[t_idx, :, :]) # plot separately the masked flux surfaces plt.figure() plt.imshow(psinormRZ_masked[t_idx, :, :]) plt.colorbar() return psinormRZ_masked
f=nc.Dataset(exp.ctl.basin_mask) mm={'tmask':2,'AMOC_MASK':1} for key in mm: try: x=f.variables[key]; x.set_auto_maskandscale(True) bmask=sp.ma.masked_not_equal(x[:],mm[key]).mask except KeyError: pass ty=exp.ctl.subset(iind=0) gm=exp.ctl.subset(iind=0) for tt,xx in enumerate(exp.ctl.time): var=exp.ctl.fromfile('ty_trans', tind=tt) mask=sp.logical_or(var.data.mask,bmask[sp.newaxis,sp.newaxis]) var.data.mask=mask ty.data[tt,:,:,0]=var.data.sum(3) try: var=exp.ctl.fromfile('ty_trans_gm',tind=tt) except KeyError: print 'No ty_trans_gm is found.' var.data.mask=mask gm.data[tt,:,:,0]=var.data.sum(3) km=ty.grid['lev'].size for l in reversed(xrange(km-1)): ty.data[:,l,:,:]=ty.data[:,l:l+2,:,:].sum(1) ty.data+=gm.data; ty.data*=-1
def coordinate_datasets(reference_genotype_file, hdf5_file, summary_dict, validation_genotype_file=None, genetic_map_dir=None, min_maf=0.01, skip_coordination=False, max_freq_discrep = 0.15, debug=False): summary_dict[3.9]={'name':'dash', 'value':'Coordination'} t0 = time.time() if validation_genotype_file is not None: print('Coordinating datasets (Summary statistics, LD reference genotypes, and Validation genotypes).') else: print('Coordinating datasets (Summary statistics and LD reference genotypes).') plinkf = plinkfile.PlinkFile(reference_genotype_file) # Figure out chromosomes and positions. if debug: print('Parsing plinkf_dict_val reference genotypes') loci = plinkf.get_loci() plinkf.close() summary_dict[4]={'name':'Num individuals in LD Reference data:','value':plinkfiles.get_num_indivs(reference_genotype_file)} summary_dict[4.1]={'name':'SNPs in LD Reference data:','value':len(loci)} gf_chromosomes = [l.chromosome for l in loci] chromosomes = sp.unique(gf_chromosomes) chromosomes.sort() chr_dict = plinkfiles.get_chrom_dict(loci, chromosomes) if validation_genotype_file is not None: if debug: print('Parsing LD validation bim file') plinkf_val = plinkfile.PlinkFile(validation_genotype_file) # Loads only the individuals... plinkf_dict_val = plinkfiles.get_phenotypes(plinkf_val) loci_val = plinkf_val.get_loci() plinkf_val.close() summary_dict[5]={'name':'SNPs in Validation data:','value':len(loci_val)} chr_dict_val = plinkfiles.get_chrom_dict(loci_val, chromosomes) # Open HDF5 file and prepare out data assert not 'iids' in hdf5_file, 'Something is wrong with the HDF5 file, no individuals IDs were found.' if plinkf_dict_val['has_phenotype']: hdf5_file.create_dataset('y', data=plinkf_dict_val['phenotypes']) summary_dict[6]={'name':'Num validation phenotypes:','value':plinkf_dict_val['num_individs']} hdf5_file.create_dataset('fids', data=sp.array(plinkf_dict_val['fids'], dtype=util.fids_dtype)) hdf5_file.create_dataset('iids', data=sp.array(plinkf_dict_val['iids'], dtype=util.iids_dtype)) maf_adj_risk_scores = sp.zeros(plinkf_dict_val['num_individs']) # Now summary statistics ssf = hdf5_file['sum_stats'] cord_data_g = hdf5_file.create_group('cord_data') num_common_snps = 0 # corr_list = [] chromosomes_found = set() num_snps_common_before_filtering =0 num_snps_common_after_filtering =0 tot_num_non_matching_nts = 0 tot_num_non_supported_nts = 0 tot_num_ambig_nts = 0 tot_num_freq_discrep_filtered_snps = 0 tot_num_maf_filtered_snps = 0 tot_g_ss_nt_concord_count = 0 if validation_genotype_file is not None: tot_g_vg_nt_concord_count = 0 tot_vg_ss_nt_concord_count = 0 # Now iterate over chromosomes chrom_i = 0 for chrom in chromosomes: chrom_i +=1 if not debug: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%' % (100.0 * (float(chrom_i) / (len(chromosomes)+1)))) sys.stdout.flush() try: chr_str = 'chrom_%d' % chrom ssg = ssf[chr_str] except Exception as err_str: print(err_str) print('Did not find chromosome %d in SS dataset.'%chrom) print('Continuing.') continue if debug: print('Coordinating data for chromosome %s' % chr_str) chromosomes_found.add(chrom) #Get summary statistics chromosome group ssg = ssf['chrom_%d' % chrom] ss_sids = (ssg['sids'][...]).astype(util.sids_u_dtype) if validation_genotype_file is not None: chrom_d_val = chr_dict_val[chr_str] vg_sids = chrom_d_val['sids'] common_sids = sp.intersect1d(ss_sids, vg_sids) # A map from sid to index for validation data vg_sid_dict = {} for i, sid in enumerate(vg_sids): vg_sid_dict[sid] = i else: common_sids = ss_sids # A map from sid to index for summary stats ss_sid_dict = {} for i, sid in enumerate(ss_sids): ss_sid_dict[sid] = i #The indices to retain for the LD reference genotypes chrom_d = chr_dict[chr_str] g_sids = chrom_d['sids'] common_sids = sp.intersect1d(common_sids, g_sids) # A map from sid to index for LD reference data g_sid_dict = {} for i, sid in enumerate(g_sids): g_sid_dict[sid] = i if debug: print('Found %d SNPs on chrom %d that were common across all datasets' % (len(common_sids), chrom)) print('Ordering SNPs by genomic positions (based on LD reference genotypes).') g_snp_map = [] for sid in common_sids: g_snp_map.append(g_sid_dict[sid]) # order by positions (based on LD reference file) g_positions = sp.array(chrom_d['positions'])[g_snp_map] order = sp.argsort(g_positions) g_snp_map = sp.array(g_snp_map)[order] g_snp_map = g_snp_map.tolist() common_sids = sp.array(common_sids)[order] # Get the ordered sum stats SNPs indices. ss_snp_map = [] for sid in common_sids: ss_snp_map.append(ss_sid_dict[sid]) # Get the ordered validation SNPs indices if validation_genotype_file is not None: vg_snp_map = [] for sid in common_sids: vg_snp_map.append(vg_sid_dict[sid]) vg_nts = sp.array(chrom_d_val['nts']) vg_nts_ok = sp.array(vg_nts)[vg_snp_map] g_nts = sp.array(chrom_d['nts']) ss_nts = (ssg['nts'][...]).astype(util.nts_u_dtype) betas = ssg['betas'][...] log_odds = ssg['log_odds'][...] if 'freqs' in ssg: ss_freqs = ssg['freqs'][...] g_ss_nt_concord_count = sp.sum( g_nts[g_snp_map] == ss_nts[ss_snp_map]) / 2.0 if validation_genotype_file is not None: vg_ss_nt_concord_count = sp.sum(vg_nts_ok == ss_nts[ss_snp_map]) / 2.0 g_vg_nt_concord_count = sp.sum(g_nts[g_snp_map] == vg_nts_ok) / 2.0 if debug: print('Nucleotide concordance counts out of %d genotypes, vg-rg: %d ; vg-ss: %d' % (len(g_snp_map), g_vg_nt_concord_count, vg_ss_nt_concord_count)) tot_vg_ss_nt_concord_count += vg_ss_nt_concord_count tot_g_vg_nt_concord_count += g_vg_nt_concord_count tot_g_ss_nt_concord_count += g_ss_nt_concord_count if debug: print('Nucleotide concordance counts out of %d genotypes, rg-ss: %d' % (len(g_snp_map), g_ss_nt_concord_count)) num_freq_discrep_filtered_snps = 0 num_non_matching_nts = 0 num_non_supported_nts = 0 num_ambig_nts = 0 # Identifying which SNPs have nucleotides that are ok.. ok_nts = [] ok_indices = {'g': [], 'ss': []} if validation_genotype_file is not None: ok_indices['vg']=[] #Now loop over SNPs to coordinate nucleotides. if validation_genotype_file is not None: for g_i, vg_i, ss_i in zip(g_snp_map, vg_snp_map, ss_snp_map): # To make sure, is the SNP id the same? assert g_sids[g_i] == vg_sids[vg_i] == ss_sids[ss_i], 'Some issues with coordinating the genotypes.' g_nt = g_nts[g_i] if not skip_coordination: vg_nt = vg_nts[vg_i] ss_nt = ss_nts[ss_i] # Is the nucleotide ambiguous. g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 continue # First check if nucleotide is sane? if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_supported_nts += 1 continue os_g_nt = sp.array( [util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]]]) flip_nts = False #Coordination is a bit more complicate when validation genotypes are provided.. if not ((sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt)) and (sp.all(g_nt == vg_nt) or sp.all(os_g_nt == vg_nt))): if sp.all(g_nt == vg_nt) or sp.all(os_g_nt == vg_nt): flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or ( os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) # Try flipping the SS nt if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg: ss_freqs[ss_i] = 1 - ss_freqs[ss_i] else: if debug: print("Nucleotides don't match after all?: g_sid=%s, ss_sid=%s, g_i=%d, ss_i=%d, g_nt=%s, ss_nt=%s" % \ (g_sids[g_i], ss_sids[ss_i], g_i, ss_i, str(g_nt), str(ss_nt))) num_non_matching_nts += 1 continue else: num_non_matching_nts += 1 continue # Opposite strand nucleotides # everything seems ok. ok_indices['g'].append(g_i) ok_indices['vg'].append(vg_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) else: for g_i, ss_i in zip(g_snp_map, ss_snp_map): # To make sure, is the SNP id the same? assert g_sids[g_i] == ss_sids[ss_i], 'Some issues with coordinating the genotypes.' g_nt = g_nts[g_i] if not skip_coordination: ss_nt = ss_nts[ss_i] # Is the nucleotide ambiguous. g_nt = [g_nts[g_i][0], g_nts[g_i][1]] if tuple(g_nt) in util.ambig_nts: num_ambig_nts += 1 continue # First check if nucleotide is sane? if (not g_nt[0] in util.valid_nts) or (not g_nt[1] in util.valid_nts): num_non_matching_nts += 1 continue os_g_nt = sp.array( [util.opp_strand_dict[g_nt[0]], util.opp_strand_dict[g_nt[1]]]) flip_nts = False #Coordination is a bit more complicate when validation genotypes are provided.. if not sp.all(g_nt == ss_nt) or sp.all(os_g_nt == ss_nt): flip_nts = (g_nt[1] == ss_nt[0] and g_nt[0] == ss_nt[1]) or ( os_g_nt[1] == ss_nt[0] and os_g_nt[0] == ss_nt[1]) # Try flipping the SS nt if flip_nts: betas[ss_i] = -betas[ss_i] log_odds[ss_i] = -log_odds[ss_i] if 'freqs' in ssg and ss_freqs[ss_i]>0: ss_freqs[ss_i] = 1.0 - ss_freqs[ss_i] else: if debug: print("Nucleotides don't match after all?: g_sid=%s, ss_sid=%s, g_i=%d, ss_i=%d, g_nt=%s, ss_nt=%s" % \ (g_sids[g_i], ss_sids[ss_i], g_i, ss_i, str(g_nt), str(ss_nt))) num_non_matching_nts += 1 continue # everything seems ok. ok_indices['g'].append(g_i) ok_indices['ss'].append(ss_i) ok_nts.append(g_nt) if debug: print('%d SNPs had ambiguous nucleotides.' % num_ambig_nts) print('%d SNPs were excluded due to nucleotide issues.' % num_non_matching_nts) # Resorting by position positions = sp.array(chrom_d['positions'])[ok_indices['g']] # Now parse SNPs .. snp_indices = sp.array(chrom_d['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices = snp_indices[ok_indices['g']] raw_snps, freqs = plinkfiles.parse_plink_snps( reference_genotype_file, snp_indices) snp_stds = sp.sqrt(2 * freqs * (1 - freqs)) snp_means = freqs * 2 betas = betas[ok_indices['ss']] log_odds = log_odds[ok_indices['ss']] ps = ssg['ps'][...][ok_indices['ss']] nts = sp.array(ok_nts) sids = (ssg['sids'][...]).astype(util.sids_u_dtype) sids = sids[ok_indices['ss']] #Parse validation genotypes, if available if validation_genotype_file is not None: snp_indices_val = sp.array(chrom_d_val['snp_indices']) # Pinpoint where the SNPs are in the file. snp_indices_val = snp_indices_val[ok_indices['vg']] raw_snps_val, freqs_val = plinkfiles.parse_plink_snps( validation_genotype_file, snp_indices_val) snp_stds_val = sp.sqrt(2 * freqs_val * (1 - freqs_val)) snp_means_val = freqs_val * 2 # Check SNP frequencies, screen for possible problems.. if max_freq_discrep<1 and 'freqs' in ssg: ss_freqs = ss_freqs[ok_indices['ss']] ok_freq_snps = sp.logical_or(sp.absolute(ss_freqs - freqs) < max_freq_discrep,sp.absolute(ss_freqs + freqs-1) < max_freq_discrep) #Array of np.bool values ok_freq_snps = sp.logical_or(ok_freq_snps,ss_freqs<=0) #Only consider SNPs that actually have frequencies num_freq_discrep_filtered_snps = len(ok_freq_snps)- sp.sum(ok_freq_snps) assert num_freq_discrep_filtered_snps>=0, "Problems when filtering SNPs with frequency discrepencies" if num_freq_discrep_filtered_snps>0: # Filter freq_discrepancy_snps raw_snps = raw_snps[ok_freq_snps] snp_stds = snp_stds[ok_freq_snps] snp_means = snp_means[ok_freq_snps] freqs = freqs[ok_freq_snps] ps = ps[ok_freq_snps] positions = positions[ok_freq_snps] nts = nts[ok_freq_snps] sids = sids[ok_freq_snps] betas = betas[ok_freq_snps] log_odds = log_odds[ok_freq_snps] if validation_genotype_file is not None: raw_snps_val = raw_snps_val[ok_freq_snps] snp_stds_val = snp_stds_val[ok_freq_snps] snp_means_val = snp_means_val[ok_freq_snps] freqs_val = freqs_val[ok_freq_snps] if debug: print('Filtered %d SNPs due to frequency discrepancies'%num_freq_discrep_filtered_snps) # Filter minor allele frequency SNPs. maf_filter = (freqs > min_maf) * (freqs < (1 - min_maf)) num_maf_filtered_snps = len(maf_filter)-sp.sum(maf_filter) assert num_maf_filtered_snps>=0, "Problems when filtering SNPs with low minor allele frequencies" if num_maf_filtered_snps>0: raw_snps = raw_snps[maf_filter] snp_stds = snp_stds[maf_filter] snp_means = snp_means[maf_filter] freqs = freqs[maf_filter] ps = ps[maf_filter] positions = positions[maf_filter] nts = nts[maf_filter] sids = sids[maf_filter] betas = betas[maf_filter] log_odds = log_odds[maf_filter] if validation_genotype_file is not None: raw_snps_val = raw_snps_val[maf_filter] snp_stds_val = snp_stds_val[maf_filter] snp_means_val = snp_means_val[maf_filter] freqs_val = freqs_val[maf_filter] if debug: print('Filtered %d SNPs due to low MAF'%num_maf_filtered_snps) genetic_map = [] if genetic_map_dir is not None: with gzip.open(genetic_map_dir + 'chr%d.interpolated_genetic_map.gz' % chrom) as f: for line in f: l = line.split() # if l[0] in sid_set: # genetic_map.append(l[0]) else: genetic_map = None coord_data_dict = {'chrom': 'chrom_%d' % chrom, 'raw_snps_ref': raw_snps, 'snp_stds_ref': snp_stds, 'snp_means_ref': snp_means, 'freqs_ref': freqs, 'ps': ps, 'positions': positions, 'nts': nts, 'sids': sids, 'genetic_map': genetic_map, 'betas': betas, 'log_odds': log_odds} if validation_genotype_file is not None: maf_adj_prs = sp.dot(log_odds, raw_snps_val) if debug and plinkf_dict_val['has_phenotype']: maf_adj_corr = sp.corrcoef(plinkf_dict_val['phenotypes'], maf_adj_prs)[0, 1] print('Log odds, per genotype PRS correlation w phenotypes for chromosome %d was %0.4f' % (chrom, maf_adj_corr)) coord_data_dict['raw_snps_val']=raw_snps_val coord_data_dict['snp_stds_val']=snp_stds_val coord_data_dict['snp_means_val']=snp_means_val coord_data_dict['freqs_val']=freqs_val coord_data_dict['log_odds_prs']=maf_adj_prs maf_adj_risk_scores += maf_adj_prs write_coord_data(cord_data_g, coord_data_dict, debug=debug) if debug: print('%d SNPs were retained on chromosome %d.' % (len(sids), chrom)) num_snps_common_before_filtering += len(common_sids) num_snps_common_after_filtering += len(sids) tot_num_ambig_nts += num_ambig_nts tot_num_non_supported_nts += num_non_supported_nts tot_num_non_matching_nts += num_non_matching_nts tot_num_freq_discrep_filtered_snps += num_freq_discrep_filtered_snps tot_num_maf_filtered_snps += num_maf_filtered_snps if not debug: sys.stdout.write('\b\b\b\b\b\b\b%0.2f%%\n' % (100.0)) sys.stdout.flush() # Now calculate the prediction r^2 if validation_genotype_file: if debug and plinkf_dict_val['has_phenotype']: maf_adj_corr = sp.corrcoef( plinkf_dict_val['phenotypes'], maf_adj_risk_scores)[0, 1] print('Log odds, per PRS correlation for the whole genome was %0.4f (r^2=%0.4f)' % (maf_adj_corr, maf_adj_corr ** 2)) print('Overall nucleotide concordance counts: rg_vg: %d, rg_ss: %d, vg_ss: %d' % (tot_g_vg_nt_concord_count, tot_g_ss_nt_concord_count, tot_vg_ss_nt_concord_count)) else: if debug: print('Overall nucleotide concordance counts, rg_ss: %d' % (tot_g_ss_nt_concord_count)) summary_dict[7]={'name':'Num chromosomes used:','value':len(chromosomes_found)} summary_dict[8]={'name':'SNPs common across datasets:','value':num_snps_common_before_filtering} summary_dict[9]={'name':'SNPs retained after filtering:','value':num_snps_common_after_filtering} if tot_num_ambig_nts>0: summary_dict[10]={'name':'SNPs w ambiguous nucleotides filtered:','value':tot_num_ambig_nts} if tot_num_non_supported_nts>0: summary_dict[10.1]={'name':'SNPs w unknown/unsupported nucleotides filtered:','value':tot_num_non_supported_nts} if tot_num_non_matching_nts>0: summary_dict[11]={'name':'SNPs w other nucleotide discrepancies filtered:','value':tot_num_non_matching_nts} if min_maf>0: summary_dict[12]={'name':'SNPs w MAF<%0.3f filtered:'%min_maf,'value':tot_num_maf_filtered_snps} if max_freq_discrep<0.5: summary_dict[13]={'name':'SNPs w allele freq discrepancy > %0.3f filtered:'%max_freq_discrep,'value':tot_num_freq_discrep_filtered_snps} t1 = time.time() t = (t1 - t0) summary_dict[13.9]={'name':'dash', 'value':'Running times'} summary_dict[15]={'name':'Run time for coordinating datasets:','value': '%d min and %0.2f sec'%(t / 60, t % 60)}
### READ CodesDf = bhv.parse_code_map(code_map_path) code_map = dict(zip(CodesDf['code'],CodesDf['name'])) Data = bhv.parse_arduino_log(log_path, code_map) ### COMMON # the names of the things present in the log span_names = [name.split('_ON')[0] for name in CodesDf['name'] if name.endswith('_ON')] event_names = [name.split('_EVENT')[0] for name in CodesDf['name'] if name.endswith('_EVENT')] Spans = bhv.log2Spans(Data, span_names) Events = bhv.log2Events(Data, event_names) ### SOME PREPROCESSING # filter unrealistic licks bad_licks = sp.logical_or(Spans['LICK']['dt'] < 20,Spans['LICK']['dt'] > 100) Spans['LICK'] = Spans['LICK'].loc[~bad_licks] # add lick_event Lick_Event = pd.DataFrame(sp.stack([['NA']*Spans['LICK'].shape[0],Spans['LICK']['t_on'].values,['LICK_EVENT']*Spans['LICK'].shape[0]]).T,columns=['code','t','name']) Lick_Event['t'] = Lick_Event['t'].astype('float') Data = Data.append(Lick_Event) Data.sort_values('t') event_names.append("LICK") Events['LICK'] = bhv.log2Event(Data,'LICK') Spans.pop("LICK") span_names.remove("LICK") colors = sns.color_palette('hls',n_colors=len(event_names)+len(span_names))[::-1]
def make_rdnoise(self): if self.verbose: print('Loading {}'.format(infile)) #Read in input fits file using astropy with fits.open(self.infile) as h: self.data = h[1].data self.hdr = h[1].header self.hdr0 = h[0].header #Keep only the first integration if len(self.data.shape) == 4: print('WARNING: Input data cube has 4 dimensions.') print('Extracting the first integration only and continuing.') self.data = self.data[0, :, :, :] elif len(self.data.shape) < 3: print 'ERROR: data cube has less than 3 dimensions!!! Quitting!' sys.exit(0) #Make sure the data are full frame Ngroups, ny, nx = self.data.shape if (ny, nx) != (2048, 2048): print("WARNING: input data from {} appear not to be full frame!". format(self.infile)) print("x,y dimensions are {}x{}".format(nx, ny)) sys.exit() if self.verbose: print('Number of groups: {}'.format(Ngroups)) #Set step sizes stepsizex = self.stepsizex stepsizey = self.stepsizey if stepsizex is None: stepsizex = self.boxsizex if stepsizey is None: stepsizey = self.boxsizey halfstepsizex = int(0.5 * stepsizex) halfstepsizey = int(0.5 * stepsizey) # get the xy limits. if (self.xmin is None): xmin = 0 else: xmin = self.xmin if (self.xmax is None): xmax = self.data.shape[2] else: xmax = self.xmax if (self.ymin is None): ymin = 0 else: ymin = self.ymin if (self.ymax is None): ymax = self.data.shape[1] else: ymax = self.ymax if self.verbose: print('xmin, xmax, ymin, ymax: {}, {}, {}, {}'.format( xmin, xmax, ymin, ymax)) sigmacut = calcaverageclass() #Create a series of CDS frames diffim = self.data[1:Ngroups, :, :] - self.data[0:Ngroups - 1, :, :] # define output matrices self.im_rdnoise = scipy.zeros((self.data.shape[1], self.data.shape[2]), dtype=float) self.im_rdnoise_err = scipy.zeros(self.im_rdnoise.shape, dtype=float) im_Nused = scipy.zeros(self.im_rdnoise.shape, dtype=int) im_Pskipped = scipy.zeros(self.im_rdnoise.shape, dtype=float) # load mask if self.bpm is not None: print('Loading mask file {}'.format(self.bpm)) with fits.open(self.bpm) as bpmhdu: self.bpmdata = bpmhdu[1].data if self.bpmdata.shape != (2048, 2048): print( "WARNING! Bad pixel mask shape is incorrect or was improperly read!" ) sys.exit() mask = scipy.where( scipy.logical_and(self.bpmdata, self.bpmval) > 0, True, False) # mask the overscan (THIS ASSUMES WE ARE WORKING ON FULL FRAME DATA) mask[0:4, :] = 1 mask[2044:2048, :] = 1 mask[:, 0:4] = 1 mask[:, 2044:2048] = 1 else: mask = scipy.zeros((self.data.shape[1], self.data.shape[2]), dtype=np.uint16) #load epoxy mask if needed detector = self.hdr0['DETECTOR'] if detector in [ 'NRCA1', 'NRCA3', 'NRCA4', 'NRCALONG', 'NRCB1', 'NRCB4' ]: epoxymask = self.load_epoxy_mask(detector) #invert the epoxy mask if requested if self.invert_epoxy: print( "Inverting the epoxy mask to work within the epoxy void region." ) epoxymask = epoxymask - 1 epoxymask[epoxymask == -1] = 1 #add the epoxy mask to the bpm mask = scipy.logical_or(mask, epoxymask) #create an inverted epoxy mask for later when filling in readnoise values #inv_epoxymask = epoxymask.astype(bool) #inv_epoxymask = np.invert(inv_epoxymask) x = xmin + halfstepsizex OneoverSQR2 = 1.0 / math.sqrt(2) xtransitions = [512, 2 * 512, 3 * 512] if self.gmin is None: gmin = 0 else: gmin = self.gmin if self.gmax is None: gmax = diffim.shape[0] else: gmax = self.gmax if gmax > diffim.shape[0]: gmax = diffim.shape[0] #lists of starting and stopping indexes for the filling later xfills = [] xfille = [] yfills = [] yfille = [] grange = range(gmin, gmax) while x < xmax: # define the x1,x2 of the box for stats x1 = x - int(0.5 * self.boxsizex) if x1 < 0: x1 = 0 if self.forcexylimits and (x1 < xmin): x1 = xmin x2 = int(x + max(1, 0.5 * self.boxsizex)) if x2 > self.data.shape[2]: x2 = self.data.shape[2] if self.forcexylimits and (x2 > xmax): x2 = xmax # make sure that the box contains only data from the same amp!!! for xtransition in xtransitions: if x >= xtransition and x1 < xtransition: x1 = xtransition if x < xtransition and x2 >= xtransition: x2 = xtransition #print('!!!x',x,0.5*self.boxsizex,x1,x2) y = ymin + halfstepsizey while y < ymax: # define the y1,y2 of the box for stats y1 = y - int(0.5 * self.boxsizey) if y1 < 0: y1 = 0 if self.forcexylimits and (y1 < ymin): y1 = ymin y2 = int(y + max(1, 0.5 * self.boxsizey)) if y2 > self.data.shape[2]: y2 = self.data.shape[1] if self.forcexylimits and (y2 > ymax): y2 = ymax if self.verbose: if (x % 64) == 0 and (y == 0): print('(x,y)=(%d,%d) box:%d:%d,%d:%d' % (x, y, x1, x2, y1, y2)) stdevs = [] Nused = [] Pskipped = [] for g in grange: sigmacut.calcaverage_sigmacutloop(diffim[g, y1:y2, x1:x2], mask=mask[y1:y2, x1:x2], Nsigma=3.0, verbose=0) if self.verbose: print('x:%d y:%d g:%d' % (x, y, g), sigmacut.__str__()) if sigmacut.converged and sigmacut.Nused > self.Npixmin and 100.0 * sigmacut.Nskipped / ( sigmacut.Nused + sigmacut.Nskipped) < self.Pclipmax: stdevs.append(sigmacut.stdev) Nused.append(sigmacut.Nused) Pskipped.append(100.0 * sigmacut.Nskipped / (sigmacut.Nused + sigmacut.Nskipped)) if len(stdevs) > 1: sigmacut.calcaverage_sigmacutloop(np.array(stdevs), Nsigma=3.0, verbose=0) if sigmacut.converged: self.im_rdnoise[y, x] = sigmacut.mean * OneoverSQR2 self.im_rdnoise_err[ y, x] = sigmacut.mean_err * OneoverSQR2 if self.verbose: print('x:%d y:%d average' % (x, y), sigmacut.__str__()) im_Nused[y, x] = scipy.median(Nused) im_Pskipped[y, x] = scipy.median(Pskipped) elif len(stdevs) == 1: self.im_rdnoise[y, x] = sigmacut.stdev * OneoverSQR2 self.im_rdnoise_err[y, x] = sigmacut.stdev_err * OneoverSQR2 im_Nused[y, x] = 1 im_Pskipped[y, x] = 0.0 if self.fill: xx1 = x - halfstepsizex for xtransition in xtransitions: if x - stepsizex < xtransition and x >= xtransition: xx1 = xtransition if x - stepsizex < 0: xx1 = 0 if xx1 < x1: xx1 = x1 if xx1 < 0: xx1 = 0 xx2 = x + max(1, halfstepsizex) for xtransition in xtransitions: if x + stepsizex >= xtransition and x < xtransition: xx2 = xtransition if x + stepsizex > self.data.shape[2]: xx2 = self.data.shape[2] if xx2 > x2: xx2 = x2 if xx2 > self.data.shape[2]: xx2 = self.data.shape[2] yy1 = y - halfstepsizey if y - stepsizey < 0: yy1 = 0 if yy1 < y1: yy1 = y1 if yy1 < 0: yy1 = 0 yy2 = y + max(1, halfstepsizey) if y + stepsizey > self.data.shape[1]: yy2 = self.data.shape[1] if yy2 > y2: yy2 = y2 if yy2 > self.data.shape[1]: yy2 = self.data.shape[1] #save the x and y coordinates for filling in missing data later xfills.append(xx1) xfille.append(xx2) yfills.append(yy1) yfille.append(yy2) if len(stdevs) > 0: self.im_rdnoise[yy1:yy2, xx1:xx2] = self.im_rdnoise[y, x] self.im_rdnoise_err[yy1:yy2, xx1:xx2] = self.im_rdnoise_err[y, x] im_Nused[yy1:yy2, xx1:xx2] = im_Nused[y, x] im_Pskipped[yy1:yy2, xx1:xx2] = im_Pskipped[y, x] y += stepsizey x += stepsizex #fill in the gaps in the map (i.e. the partial boxes that contain less than the minimum number #of pixels needed for calculating the readnoise for iter in range(self.fill_iterations): #print('BEGINNING FILL ITERATION {}'.format(iter)) self.im_rdnoise = self.fill_empty_regions(self.im_rdnoise, xfills, xfille, yfills, yfille) self.im_rdnoise_err = self.fill_empty_regions( self.im_rdnoise_err, xfills, xfille, yfills, yfille) #mask out the pixels in the low epoxy area if detector in [ 'NRCA1', 'NRCA3', 'NRCA4', 'NRCALONG', 'NRCB1', 'NRCB4' ]: self.im_rdnoise[epoxymask.astype(bool)] = 0. self.im_rdnoise_err[epoxymask.astype(bool)] = 0. #save output file outfilename = self.save_readnoise_file(self.im_rdnoise) #redcat team checks #subprocess.call(['fitsverify',outfilename]) return outfilename, self.im_rdnoise
def scale_by_cal(Data, scale_t_ave=True, scale_f_ave=False, sub_med=False, scale_f_ave_mod=False, rotate=False): """Puts all data in units of the cal temperature. Data is put into units of the cal temperature, thus removing dependence on the gain. This can be done by dividing by the time average of the cal (scale_t_ave=True, Default) thus removing dependence on the frequency- dependant gain. Alternatively, you can scale by the frequency average to remove the time-dependent gain (scale_f_ave=True). Data is then in units of the frequency averaged cal temperture. You can also do both (recommended). After some scaling the data ends up in units of the cal temperture as a funciton of frequency. Optionally you can also subtract the time average of the data off here (subtract_time_median), since you might be done with the cal information at this point. """ on_ind = 0 off_ind = 1 if (Data.field['CAL'][on_ind] != 'T' or Data.field['CAL'][off_ind] != 'F'): raise ce.DataError('Cal states not in expected order.') if tuple(Data.field['CRVAL4']) == (-5, -7, -8, -6): # Here we check the polarizations and cal indicies xx_ind = 0 yy_ind = 3 xy_inds = [1, 2] # A bunch of calculations used to test phase closure. Not acctually # relevant to what is being done here. #a = (Data.data[5, xy_inds, on_ind, 15:20] # - Data.data[5, xy_inds, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, xx_ind, on_ind, 15:20] # - Data.data[5, xx_ind, off_ind, 15:20]) #a /= sp.sqrt( Data.data[5, yy_ind, on_ind, 15:20] # - Data.data[5, yy_ind, off_ind, 15:20]) #print a[0,:]**2 + a[1,:]**2 diff_xx = Data.data[:, xx_ind, on_ind, :] - Data.data[:, xx_ind, off_ind, :] diff_yy = Data.data[:, yy_ind, on_ind, :] - Data.data[:, yy_ind, off_ind, :] if scale_t_ave: # Find the cal means (in time) and scale by them. # Means work much better than medians. Medians seems to bias the # result by up to 10%. This seems to be discretization noise. Cal # switches fast enough that we shouldn't need this anyway. cal_tmed_xx = ma.mean(diff_xx, 0) cal_tmed_yy = ma.mean(diff_yy, 0) cal_tmed_xx[sp.logical_or(cal_tmed_xx <= 0, cal_tmed_yy <= 0)] = ma.masked cal_tmed_yy[cal_tmed_xx.mask] = ma.masked Data.data[:, xx_ind, :, :] /= cal_tmed_xx Data.data[:, yy_ind, :, :] /= cal_tmed_yy Data.data[:, xy_inds, :, :] /= ma.sqrt(cal_tmed_yy * cal_tmed_xx) if scale_f_ave: # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) # Flag data with wierd cal power. Still Experimental. cal_fmea_xx[sp.logical_or(cal_fmea_xx <= 0, cal_fmea_yy <= 0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) cal_fmea_xx[sp.logical_or( abs(cal_fmea_xx.anom()) >= 0.1 * cal_xx, abs(cal_fmea_yy.anom()) >= 0.1 * cal_yy)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:, xx_ind, :, :] /= cal_fmea_xx Data.data[:, yy_ind, :, :] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:, xy_inds, :, :] /= ma.sqrt(cal_fmea_yy * cal_fmea_xx) if scale_f_ave_mod: # The frequency gains have have systematic structure to them, # they are not by any approximation gaussian distributed. Use # means, not medians across frequency. operation = ma.mean cal_fmea_xx = operation(diff_xx, -1) cal_fmea_yy = operation(diff_yy, -1) cal_fmea_xx_off = operation(Data.data[:, xx_ind, off_ind, :], -1) cal_fmea_yy_off = operation(Data.data[:, yy_ind, off_ind, :], -1) sys_xx = cal_fmea_xx_off / cal_fmea_xx sys_yy = cal_fmea_yy_off / cal_fmea_yy percent_ok = 0.03 sys_xx_tmed = ma.median(sys_xx) sys_yy_tmed = ma.median(sys_yy) maskbad_xx = (sys_xx > sys_xx_tmed + sys_xx_tmed * percent_ok) | ( sys_xx < sys_xx_tmed - sys_xx_tmed * percent_ok) maskbad_yy = (sys_yy > sys_yy_tmed + sys_yy_tmed * percent_ok) | ( sys_yy < sys_yy_tmed - sys_yy_tmed * percent_ok) cal_fmea_xx[sp.logical_or(cal_fmea_xx <= 0, cal_fmea_yy <= 0)] = ma.masked cal_fmea_yy[cal_fmea_xx.mask] = ma.masked cal_fmea_xx[maskbad_xx] = ma.masked cal_fmea_yy[maskbad_yy] = ma.masked cal_xx = ma.mean(cal_fmea_xx) cal_yy = ma.mean(cal_fmea_yy) ntime = len(cal_fmea_xx) cal_fmea_xx.shape = (ntime, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1) Data.data[:, xx_ind, :, :] /= cal_fmea_xx Data.data[:, yy_ind, :, :] /= cal_fmea_yy cal_fmea_xx.shape = (ntime, 1, 1, 1) cal_fmea_yy.shape = (ntime, 1, 1, 1) Data.data[:, xy_inds, :, :] /= ma.sqrt(cal_fmea_yy * cal_fmea_xx) if scale_f_ave and scale_t_ave: # We have devided out t_cal twice so we need to put one factor back # in. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:, xx_ind, :, :] *= cal_xx Data.data[:, yy_ind, :, :] *= cal_yy Data.data[:, xy_inds, :, :] *= ma.sqrt(cal_yy * cal_xx) if scale_f_ave_mod and scale_t_ave: #Same divide out twice problem. cal_xx = operation(cal_tmed_xx) cal_yy = operation(cal_tmed_yy) Data.data[:, xx_ind, :, :] *= cal_xxcal_imag_mean Data.data[:, yy_ind, :, :] *= cal_yy Data.data[:, xy_inds, :, :] *= ma.sqrt(cal_yy * cal_xx) if scale_f_ave and scale_f_ave_mod: raise ce.DataError("time averaging twice") if rotate: # Define the differential cal phase to be zero and rotate all data # such that this is true. cal_real_mean = ma.mean( Data.data[:, 1, 0, :] - Data.data[:, 1, 1, :], 0) cal_imag_mean = ma.mean( Data.data[:, 2, 0, :] - Data.data[:, 2, 1, :], 0) # Get the cal phase angle as a function of frequency. cal_phase = -ma.arctan2(cal_imag_mean, cal_real_mean) # Rotate such that the cal phase is zero. Imperative to have a # temporary variable. New_data_real = (ma.cos(cal_phase) * Data.data[:, 1, :, :] - ma.sin(cal_phase) * Data.data[:, 2, :, :]) New_data_imag = (ma.sin(cal_phase) * Data.data[:, 1, :, :] + ma.cos(cal_phase) * Data.data[:, 2, :, :]) Data.data[:, 1, :, :] = New_data_real Data.data[:, 2, :, :] = New_data_imag elif tuple(Data.field['CRVAL4']) == (1, 2, 3, 4): # For the shot term, just devide everything by on-off in I. I_ind = 0 cal_I_t = Data.data[:, I_ind, on_ind, :] - Data.data[:, I_ind, off_ind, :] cal_I = ma.mean(cal_I_t, 0) Data.data /= cal_I else: raise ce.DataError("Unsupported polarization states.") # Subtract the time median if desired. if sub_med: Data.data -= ma.median(Data.data, 0)
def calcSphericalCavityLabels(self, dds): mpidims = dds.mpi.shape if (dds.mpi.comm != None): mpidims = (dds.mpi.comm.Get_size(), 1, 1) dds = mango.copy(dds, mpidims=mpidims) rootLogger.info("Calculating neighbourhood mean image...") fltDds = mango.copy(dds, mtype="tomo_float", halo=self.se.getHaloSize()) nMeanDds = mango.image.mean_filter(fltDds, self.se) nMeanDds = mango.copy(nMeanDds, halo=(0,0,0)) rootLogger.info("Calculating neighbourhood stdd image...") nStddDds = mango.image.stdd_filter(fltDds, self.se) nStddDds = mango.copy(nStddDds, halo=(0,0,0)) del fltDds self.maskLowStddVoxels(dds, nMeanDds, nStddDds) rootLogger.info("Calculating mean vs stdd histogram...") h2d, edges = mango.image.histogramdd([nMeanDds, nStddDds], bins=(1024, 8192)) h2d = h2d[:, :-2] rootLogger.info("h2d.shape = %s, edges[0].shape=%s, edges[1].shape=%s" % (h2d.shape, edges[0].shape, edges[1].shape)) rootLogger.info("Done calculating mean vs stdd histogram...") maxIdx = np.unravel_index(np.argmax(h2d), h2d.shape) rootLogger.info("np.argmax(h2d) = %s" % (maxIdx,)) backgroundMean = 0.5 * (edges[0][maxIdx[0]] + edges[0][maxIdx[0] + 1]) backgroundStdd = 0.5 * (edges[1][maxIdx[1]] + edges[1][maxIdx[1] + 1]) mskDds = mango.copy(dds) rootLogger.info("Background (mean,stdd) = (%s, %s)." % (backgroundMean, backgroundStdd)) mskDds.subd.asarray()[...] = \ sp.where( sp.logical_or( nStddDds.subd.asarray() < (8 * backgroundStdd), nMeanDds.subd.asarray() < (backgroundMean + 3 * backgroundStdd), ), mskDds.mtype.maskValue(), mskDds.subd.asarray() ) del nMeanDds, nStddDds self.writeIntermediateDds("_AaaPrePercentileTailMask", mskDds) self.eliminatePercentileTails(mskDds, 1.0, 92.5) rootLogger.info("Calculating neighbourhood stdd image...") nStddDds = mango.image.stdd_filter(mskDds, self.se) self.eliminatePercentileTails(nStddDds, 33.0, 100.0) rootLogger.info("Copying stdd percentile tail mask to mskDds...") mango.copy_masked(nStddDds, mskDds) rootLogger.info("Done copying stdd percentile tail mask to mskDds.") self.writeIntermediateDds("_AaaPstPercentileTailMask", mskDds) rootLogger.info("Eliminating small clusters from mskDds...") self.eliminateSmallClusters(mskDds, 0.1) rootLogger.info("Done eliminating small clusters from mskDds.") self.writeIntermediateDds("_AaaPstSmallClusterMask", mskDds) del nStddDds segEdtDds = mango.zeros_like(mskDds, mtype="segmented") segEdtDds.asarray()[...] = sp.where(mskDds.asarray() == mskDds.mtype.maskValue(), 0, 1) self.writeIntermediateDds("_AaaPreCvxHullMask", segEdtDds) rootLogger.info("Calculating convex hull...") cvxHullMsk = mango.image.convex_hull_3d(segEdtDds, inputmsk=0, outhull=segEdtDds.mtype.maskValue(), inhull=1) segEdtDds.asarray()[...] = sp.where(cvxHullMsk.asarray() == cvxHullMsk.mtype.maskValue(), 1, segEdtDds.asarray()) rootLogger.info("Done calculating convex hull.") self.writeIntermediateDds("_AaaPstCvxHullMask", segEdtDds) segEdtDds.setFacesToValue(1) rootLogger.info("Calculating EDT image...") edtDds = mango.image.distance_transform_edt(segEdtDds, val=0) self.writeIntermediateDds("_AaaPstCvxHullMaskEdt", edtDds) rootLogger.info("Calculating MCR image...") mcrDds = mango.image.max_covering_radius(edtDds, maxdist=0.5*(np.min(edtDds.shape)), filecache=True) mango.copy_masked(cvxHullMsk, mcrDds) rootLogger.info("Calculating (min,max) MCR values...") mcrMin, mcrMax = mango.minmax(mcrDds) rootLogger.info("Masking small MCR values...") mcrDds.asarray()[...] = sp.where(mcrDds.asarray() >= 0.05*mcrMax, mcrDds.asarray(), mcrDds.mtype.maskValue()) self.writeIntermediateDds("_AaaPstCvxHullMaskMcr", mcrDds) del cvxHullMsk, edtDds # # Normalise the intensities so that a relative-gradient is computed for the largest # MCR radii. rootLogger.info("Normalising MCR image...") mnmx = mango.minmax(mcrDds) tmpDds = mango.copy(mcrDds) tmpDds.asarray()[...] -= mnmx[1] tmpDds.asarray()[...] *= tmpDds.asarray() tmpDds.asarray()[...] = 1+mnmx[1]*np.exp(-(tmpDds.asarray())/(2*0.133*0.133*(mnmx[1]*mnmx[1]))) mcrDds.asarray()[...] = sp.where(mcrDds.asarray() > 0, mcrDds.asarray()/tmpDds.asarray(), mcrDds.asarray()) rootLogger.info("Calculating MCR image gradient...") grdMcrDds = mango.image.discrete_gaussian_gradient_magnitude(mcrDds, 0.65, errtol=0.01) grdMcrDds.asarray()[...] = sp.where(grdMcrDds.asarray() <= 3.0e-2, mcrDds.asarray(), mcrDds.mtype.maskValue()) rootLogger.info("Calculating unique MCR low-gradient values...") u = mango.unique(grdMcrDds) rootLogger.info("Converting low gradient MCR image to binary segmentation...") segDds = mango.map_element_values(grdMcrDds, lambda x: x in u, mtype="segmented") rootLogger.info("Labeling low gradient MCR regions...") mango.copy_masked(mcrDds, segDds) lblDds = mango.image.label(segDds, val=1, connectivity=26, dosort=True) self.writeIntermediateDds("_AaaPstCvxHullMaskMcrGrdLbl", lblDds) del segDds, grdMcrDds rootLogger.info("Calculating Principal Moment of Inertia...") self.pmoi, self.pmoi_axes, self.com = mango.image.moment_of_inertia(mskDds) rootLogger.info("Done calculating Principal Moment of Inertia.") return lblDds, mcrDds, segEdtDds
def reduction_T_1(self, I): A = logical_or(I[0:-1:2, :], I[1::2, :]) A = logical_or(A[:, 0:-1:2], A[:, 1::2]) return A
# import data considers only cagemates that are both genotypes and phenotyped in_file = '/Users/casale/Desktop/rat/dirIndirVD/data/HSrats_noHaplotypes.hdf5' f = h5py.File(in_file,'r') # get sample ID geno_sampleID = f['kinships']['genotypes_IBS']['cols_subjects']['outbred'][:] sampleID = f['phenotypesNcovariates']['rows_subjects']['outbred'][:] has_geno = sp.array([sampleID[i] in geno_sampleID for i in range(sampleID.shape[0])]) # read trait and covariantes trait = 'Distance0_30_bc' measures = f['phenotypesNcovariates']['cols_measures']['measures'][:] Ip = measures==trait covs = f['phenotypesNcovariates']['cols_measures']['covariates2use'][Ip][0].split(',') Ic = sp.zeros(Ip.shape[0],dtype=bool) for cov in covs: Ic = sp.logical_or(Ic,measures==cov) Y = f['phenotypesNcovariates']['array'][Ip,:].T covs = f['phenotypesNcovariates']['array'][Ic,:].T Is = sp.logical_and((covs!=-999).all(1),Y[:,0]!=-999) Is = sp.logical_and(has_geno,Is) Y = Y[Is,:]; covs = covs[Is,:] cage = f['phenotypesNcovariates']['rows_subjects']['cage'][Is] sampleID = sampleID[Is] # normalize pheno (not needed if not for numerical stability) Y-=Y.mean(0) Y/=Y.std(0) # grab kinship idxs = sp.array([sp.where(geno_sampleID==sampleID[i])[0][0] for i in range(sampleID.shape[0])]) kinship = f['kinships']['genotypes_IBS']['array'][:][idxs][:,idxs]
def resample_to_shape(source_file, region, sp_res, grid, prefix=None, nan_value=None, dest_nan_value=None, variables=None, shapefile=None): """ Resamples images and clips country boundaries Parameters ---------- source_file : str Path to source file. region : str Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. sp_res : int or float Spatial resolution of the shape-grid. grid : poets.grid.RegularGrid or poets.grid.ShapeGrid Grid to resample data to. prefix : str, optional Prefix for the variable in the NetCDF file, should be name of source nan_value : int, float, optional Not a number value of the original data as given by the data provider dest_nan_value : int or float, optional NaN value used in the final NetCDF file. variables : list of str, optional Variables to resample from original file. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. Returns ------- res_data : dict of numpy.arrays resampled image dest_lon : numpy.array longitudes of the points in the resampled image dest_lat : numpy.array latitudes of the points in the resampled image gpis : numpy.array grid point indices timestamp : datetime.date date of the image metadata : dict Metadata derived from input file. """ if prefix is not None: prefix += '_' fileExtension = os.path.splitext(source_file)[1].lower() if region == 'global': lon_min = -180 lon_max = 180 lat_min = -90 lat_max = 90 else: shp = Shape(region, shapefile) lon_min = shp.bbox[0] lon_max = shp.bbox[2] lat_min = shp.bbox[1] lat_max = shp.bbox[3] if fileExtension in ['.nc', '.nc3', '.nc4']: data_src, lon, lat, timestamp, metadata = nc.read_image(source_file, variables) if (lon_min >= lon.max() or lon_max <= lon.min() or lat_max <= lat.min() or lat_min >= lat.max()): return "No data" data, src_lon, src_lat = nc.clip_bbox(data_src, lon, lat, lon_min, lat_min, lon_max, lat_max) elif fileExtension in ['.h5']: data_src, lon, lat, timestamp, metadata = h5.read_image(source_file, variables) if (lon_min >= lon.max() or lon_max <= lon.min() or lat_max <= lat.min() or lat_min >= lat.max()): return "No data" data, src_lon, src_lat = nc.clip_bbox(data_src, lon, lat, lon_min, lat_min, lon_max, lat_max) elif fileExtension in imgfiletypes: data, src_lon, src_lat, timestamp, metadata = bbox_img(source_file, region, fileExtension, shapefile) if nan_value is not None: for key in data.keys(): data[key] = np.ma.array(data[key], mask=(data[key] == nan_value)) src_lon, src_lat = np.meshgrid(src_lon, src_lat) lons = grid.arrlon[0:grid.shape[0]] dest_lon, dest_lat = np.meshgrid(lons, np.unique(grid.arrlat)[::-1]) gpis = grid.get_bbox_grid_points(grid.arrlat.min(), grid.arrlat.max(), grid.arrlon.min(), grid.arrlon.max()) search_rad = 180000 * sp_res data = resample.resample_to_grid(data, src_lon, src_lat, dest_lon, dest_lat, search_rad=search_rad) res_data = {} path = [] if region != 'global': _, _, multipoly = shp._get_shape() for ring in multipoly: poly_verts = list(ring.exterior.coords) path.append(matplotlib.path.Path(poly_verts)) coords = [grid.arrlon, grid.arrlat[::-1]] coords2 = np.zeros((len(coords[0]), 2)) for idx in range(0, len(coords[0])): coords2[idx] = [coords[0][idx], coords[1][idx]] mask_old = path[0].contains_points(coords2) for key in data.keys(): if variables is not None: if key not in variables: del metadata[key] continue if region != 'global': for ring in path: mask_new = (ring.contains_points(coords2)) mask_rev = scipy.logical_or(mask_old, mask_new) mask_old = mask_rev mask_rev = mask_rev.reshape(dest_lon.shape) mask = np.invert(mask_rev) mask[data[key].mask == True] = True else: mask = data[key].mask if prefix is None: var = key else: var = prefix + key if metadata is not None: metadata[var] = metadata[key] if var != key: del metadata[key] res_data[var] = np.ma.masked_array(data[key], mask=np.copy(mask), fill_value=dest_nan_value) dat = np.copy(res_data[var].data) dat[mask == True] = dest_nan_value res_data[var] = np.ma.masked_array(dat, mask=np.copy(mask), fill_value=dest_nan_value) return res_data, dest_lon, dest_lat, gpis, timestamp, metadata
def invertRSTO(RSTO,Iono,alpha_list=1e-2,invtype='tik',rbounds=[100,200],Nlin=0): """ This will run the inversion program given an ionocontainer, an alpha and """ nlout,ntout,nl=Iono.Param_List.shape if Nlin !=0: nl=Nlin nlin=len(RSTO.Cart_Coords_In) time_out=RSTO.Time_Out time_in=RSTO.Time_In overlaps = RSTO.overlaps xin,yin,zin=RSTO.Cart_Coords_In.transpose() z_u=sp.unique(zin) rplane=sp.sqrt(xin**2+yin**2)*sp.sign(xin) r_u=sp.unique(rplane) n_z=z_u.size n_r=r_u.size dims= [n_r,n_z] rin,azin,elin=RSTO.Sphere_Coords_In.transpose() anglist=RSTO.simparams['angles'] ang_vec=sp.array([[i[0],i[1]] for i in anglist]) # trim out cruft zmin,zmax=[150,500] rpmin,rpmax=rbounds#[-50,100]#[100,200] altlog= sp.logical_and(zin>zmin,zin<zmax) rplog=sp.logical_and(rplane>rpmin,rplane<rpmax) allrng= RSTO.simparams['Rangegatesfinal'] dR=allrng[1]-allrng[0] nldir=sp.ceil(int(nl)/2.) posang_log1= sp.logical_and(ang_vec[:,0]<=180.,ang_vec[:,0]>=0) negang_log1 = sp.logical_or(ang_vec[:,0]>180.,ang_vec[:,0]<0) azin_pos = sp.logical_and(azin<=180.,azin>=0) azin_neg = sp.logical_or(azin>180.,azin<0) minangpos=0 minangneg=0 if sp.any(posang_log1): minangpos=ang_vec[posang_log1,1].min() if sp.any(negang_log1): minangneg=ang_vec[negang_log1,1].min() rngbounds=[allrng[0]-nldir*dR,allrng[-1]+nldir*dR] rng_log=sp.logical_and(rin>rngbounds[0],rin<rngbounds[1]) elbounds_pos=sp.logical_and(azin_pos,elin>minangpos) elbounds_neg=sp.logical_and(azin_neg,elin>minangneg) elbounds=sp.logical_or(elbounds_pos,elbounds_neg) keeplog=sp.logical_and(sp.logical_and(rng_log,elbounds),sp.logical_and(altlog,rplog)) keeplist=sp.where(keeplog)[0] nlin_red=len(keeplist) # set up derivative matrix dx,dy=diffmat(dims) dx_red=dx[keeplist][:,keeplist] dy_red=dy[keeplist][:,keeplist] # need the sparse vstack to make srue things stay sparse D=sp.sparse.vstack((dx_red,dy_red)) # New parameter matrix new_params=sp.zeros((nlin,len(time_out),nl),dtype=Iono.Param_List.dtype) if isinstance(alpha_list,numbers.Number): alpha_list=[alpha_list]*nl ave_datadif=sp.zeros((len(time_out),nl)) ave_data_const = sp.zeros_like(ave_datadif) q=1e10 for itimen, itime in enumerate(time_out): print('Making Outtime {0:d} of {1:d}'.format(itimen+1,len(time_out))) #allovers=overlaps[itimen] #curintimes=[i[0] for i in allovers] #for it_in_n,it in enumerate(curintimes): #print('\t Making Intime {0:d} of {1:d}'.format(it_in_n+1,len(curintimes))) #A=RSTO.RSTMat[itimen*nlout:(itimen+1)*nlout,it*nlin:(it+1)*nlin] A=RSTO.RSTMat[itimen*nlout:(itimen+1)*nlout,itimen*nlin:(itimen+1)*nlin] Acvx=cvx.Constant(A[:,keeplist]) for ip in range(nl): alpha=alpha_list[ip]*2 print('\t\t Making Lag {0:d} of {1:d}'.format(ip+1,nl)) datain=Iono.Param_List[:,itimen,ip] xr=cvx.Variable(nlin_red) xi=cvx.Variable(nlin_red) if invtype.lower()=='tik': constr=alpha*cvx.norm(xr,2) consti=alpha*cvx.norm(xi,2) elif invtype.lower()=='tikd': constr=alpha*cvx.norm(D*xr,2) consti=alpha*cvx.norm(D*xi,2) elif invtype.lower()=='tv': constr=alpha*cvx.norm(D*xr,1) consti=alpha*cvx.norm(D*xi,1) br=datain.real/q bi=datain.imag/q if ip==0: objective=cvx.Minimize(cvx.norm(Acvx*xr-br,2)+constr) constraints= [xr>=0] prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) # new_params[keeplog,it,ip]=xr.value.flatten() xcomp=sp.array(xr.value).flatten()*q else: objective=cvx.Minimize(cvx.norm(Acvx*xr-br,2)+constr) prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) objective=cvx.Minimize(cvx.norm(Acvx*xi-bi,2)+consti) prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) xcomp=sp.array(xr.value + 1j*xi.value).flatten()*q # new_params[keeplog,it,ip]=xcomp new_params[keeplog,itimen,ip]=xcomp ave_datadif[itimen,ip]=sp.sqrt(sp.nansum(sp.absolute(A[:,keeplist].dot(xcomp)-datain)**2)) if invtype.lower()=='tik': sumconst=sp.sqrt(sp.nansum(sp.power(sp.absolute(xcomp),2))) elif invtype.lower()=='tikd': dx=D.dot(xcomp) sumconst=sp.sqrt(sp.nansum(sp.power(sp.absolute(dx),2))) elif invtype.lower()=='tv': dx=D.dot(xcomp) sumconst=sp.nansum(sp.absolute(dx)) ave_data_const[itimen,ip]=sumconst # set up nans new_params[sp.logical_not(keeplog),itimen]=sp.nan datadif=sp.nanmean(ave_datadif,axis=0) constval=sp.nanmean(ave_data_const,axis=0) ionoout=IonoContainer(coordlist=RSTO.Cart_Coords_In,paramlist=new_params,times = time_out,sensor_loc = sp.zeros(3),ver =0,coordvecs = ['x','y','z'],paramnames=Iono.Param_Names[:Nlin]) return (ionoout,datadif,constval)
def resample_to_shape(source_file, region, sp_res, grid, prefix=None, nan_value=None, dest_nan_value=None, variables=None, shapefile=None): """ Resamples images and clips country boundaries Parameters ---------- source_file : str Path to source file. region : str Identifier of the region in the shapefile. If the default shapefile is used, this would be the FIPS country code. sp_res : int or float Spatial resolution of the shape-grid. grid : poets.grid.RegularGrid or poets.grid.ShapeGrid Grid to resample data to. prefix : str, optional Prefix for the variable in the NetCDF file, should be name of source nan_value : int, float, optional Not a number value of the original data as given by the data provider dest_nan_value : int or float, optional NaN value used in the final NetCDF file. variables : list of str, optional Variables to resample from original file. shapefile : str, optional Path to shape file, uses "world country admin boundary shapefile" by default. Returns ------- res_data : dict of numpy.arrays resampled image dest_lon : numpy.array longitudes of the points in the resampled image dest_lat : numpy.array latitudes of the points in the resampled image gpis : numpy.array grid point indices timestamp : datetime.date date of the image metadata : dict Metadata derived from input file. """ if prefix is not None: prefix += '_' fileExtension = os.path.splitext(source_file)[1].lower() if region == 'global': lon_min = -180 lon_max = 180 lat_min = -90 lat_max = 90 else: shp = Shape(region, shapefile) lon_min = shp.bbox[0] lon_max = shp.bbox[2] lat_min = shp.bbox[1] lat_max = shp.bbox[3] if fileExtension in ['.nc', '.nc3', '.nc4']: data_src, lon, lat, timestamp, metadata = nc.read_image( source_file, variables) if (lon_min >= lon.max() or lon_max <= lon.min() or lat_max <= lat.min() or lat_min >= lat.max()): return "No data" data, src_lon, src_lat = nc.clip_bbox(data_src, lon, lat, lon_min, lat_min, lon_max, lat_max) elif fileExtension in ['.h5']: data_src, lon, lat, timestamp, metadata = h5.read_image( source_file, variables) if (lon_min >= lon.max() or lon_max <= lon.min() or lat_max <= lat.min() or lat_min >= lat.max()): return "No data" data, src_lon, src_lat = nc.clip_bbox(data_src, lon, lat, lon_min, lat_min, lon_max, lat_max) elif fileExtension in imgfiletypes: data, src_lon, src_lat, timestamp, metadata = bbox_img( source_file, region, fileExtension, shapefile) if nan_value is not None: for key in data.keys(): data[key] = np.ma.array(data[key], mask=(data[key] == nan_value)) src_lon, src_lat = np.meshgrid(src_lon, src_lat) lons = grid.arrlon[0:grid.shape[0]] dest_lon, dest_lat = np.meshgrid(lons, np.unique(grid.arrlat)[::-1]) gpis = grid.get_bbox_grid_points(grid.arrlat.min(), grid.arrlat.max(), grid.arrlon.min(), grid.arrlon.max()) search_rad = 180000 * sp_res data = resample.resample_to_grid(data, src_lon, src_lat, dest_lon, dest_lat, search_rad=search_rad) res_data = {} path = [] if region != 'global': _, _, multipoly = shp._get_shape() for ring in multipoly: poly_verts = list(ring.exterior.coords) path.append(matplotlib.path.Path(poly_verts)) coords = [grid.arrlon, grid.arrlat[::-1]] coords2 = np.zeros((len(coords[0]), 2)) for idx in range(0, len(coords[0])): coords2[idx] = [coords[0][idx], coords[1][idx]] mask_old = path[0].contains_points(coords2) for key in data.keys(): if variables is not None: if key not in variables: del metadata[key] continue if region != 'global': for ring in path: mask_new = (ring.contains_points(coords2)) mask_rev = scipy.logical_or(mask_old, mask_new) mask_old = mask_rev mask_rev = mask_rev.reshape(dest_lon.shape) mask = np.invert(mask_rev) mask[data[key].mask == True] = True else: mask = data[key].mask if prefix is None: var = key else: var = prefix + key if metadata is not None: metadata[var] = metadata[key] if var != key: del metadata[key] res_data[var] = np.ma.masked_array(data[key], mask=np.copy(mask), fill_value=dest_nan_value) dat = np.copy(res_data[var].data) dat[mask == True] = dest_nan_value res_data[var] = np.ma.masked_array(dat, mask=np.copy(mask), fill_value=dest_nan_value) return res_data, dest_lon, dest_lat, gpis, timestamp, metadata
def dirFilter2D(mSize,nBands): filts=[] dirs=np.zeros((2,nBands),np.float) theta=np.array(range(nBands))*math.pi/nBands rho=np.ones(nBands) X,Y=cv.polarToCart(rho,theta) #X=np.cos(theta) #Y=np.sin(theta) dirs[0,:] =X.transpose() dirs[1,:] =Y.transpose() for k in np.array(range(nBands),np.float): ang1 = (k-0.5)*math.pi/nBands; ang2 = (k+ 0.5)*math.pi/nBands; theta = np.array([ang1, ang2, ang1, ang2, ang1],float) if flag==0: #triangular section generation Ang1=k*math.pi/nBands; Ang2=(k+1)*math.pi/nBands; Theta=np.array([Ang1,Ang2],float) Rho1=np.array([1,1],float)*math.floor(mSize/2) # xCor,yCor=cv.polarToCart(Rho,Theta) x=Rho1*np.cos(Theta)+math.ceil(mSize/2) y=Rho1*np.sin(Theta)+math.ceil(mSize/2) Mask1=np.zeros((mSize,mSize),np.float) polyVerticesTemp=np.array(np.round([[x[0],y[0]],[x[1],y[1]],[mSize/2,mSize/2]]),np.int32) # polyVertices=polyVerticesTemp.reshape(2,3) # polyVerticesNew=polyVertices.transpose() Mask1=cv.fillConvexPoly(Mask1, polyVerticesTemp, 1) Rho2=np.array([-1,-1],float)*math.floor(mSize/2) # xCor,yCor=cv.polarToCart(Rho,Theta) x=Rho2*np.cos(Theta)+math.ceil(mSize/2) y=Rho2*np.sin(Theta)+math.ceil(mSize/2) Mask2=np.zeros((mSize,mSize),np.float) polyVerticesTemp=np.array(np.round([[x[0],y[0]],[x[1],y[1]],[mSize/2,mSize/2]]),np.int32) # polyVertices=polyVerticesTemp.reshape(2,3) # polyVerticesNew=polyVertices.transpose() Mask2=cv.fillConvexPoly(Mask2, polyVerticesTemp, 1) Mask=sc.logical_or(Mask1, Mask2) Mask=Mask.astype(float) plt.imshow(Mask) N=np.float(cv.countNonZero(Mask)) plt.title(N) plt.show() else: #rectangle generation rho = np.array([1,1,-1,-1,1],float)*(mSize/2) X,Y=cv.polarToCart(rho,theta) #X=np.cos(theta)*rho #Y=np.sin(theta)*rho # X=X+math.ceil(mSize/2) # Y=Y+math.ceil(mSize/2) X=np.round(X+mSize/2) Y=np.round(Y+mSize/2) Mask=np.zeros((mSize,mSize),np.float) polyVerticesTemp=np.array([X,Y],np.int32) polyVertices=polyVerticesTemp.reshape(2,5) polyVerticesNew=polyVertices.transpose() Mask=cv.fillConvexPoly(Mask, polyVerticesNew, 1) plt.imshow(Mask) N=np.float(cv.countNonZero(Mask)) plt.title(N) plt.show() filts.append(Mask/N) filts.append(Mask) return filts, dirs