def map_to_slit(fname, clip=0.0, gamma=1.0): """take all values from a map over clip, compute best slit for PV Slice """ ia = taskinit.iatool() ia.open(fname) imshape = ia.shape() pix = ia.getchunk().squeeze() # this should now be a numpy pix[ix][iy] map pixmax = pix.max() pixrms = pix.std() if False: pix1 = pix.flatten() rpix = stats.robust(pix1) logging.debug("stats: mean: %g %g" % (pix1.mean(), rpix.mean())) logging.debug("stats: rms: %g %g" % (pix1.std(), rpix.std())) logging.debug("stats: max: %g %g" % (pix1.max(), rpix.max())) logging.debug('shape: %s %s %s' % (str(pix.shape), str(pix1.shape), str(imshape))) ia.close() nx = pix.shape[0] ny = pix.shape[1] x = np.arange(pix.shape[0]).reshape((nx, 1)) y = np.arange(pix.shape[1]).reshape((1, ny)) if clip > 0.0: nmax = nx * ny clip = clip * pixrms logging.debug("Using initial clip=%g for rms=%g" % (clip, pixrms)) m = ma.masked_less(pix, clip) while m.count() == 0: clip = 0.5 * clip logging.debug("no masking...trying lower clip=%g" % clip) m = ma.masked_less(pix, clip) else: logging.debug("Clip=%g now found %d/%d points" % (clip, m.count(), nmax)) else: #@ todo sigma-clipping with iterations? see also astropy.stats.sigma_clip() rpix = stats.robust(pix.flatten()) r_mean = rpix.mean() r_std = rpix.std() logging.info("ROBUST MAP mean/std: %f %f" % (r_mean, r_std)) m = ma.masked_less(pix, -clip * r_std) logging.debug("Found > clip=%g : %g" % (clip, m.count())) if m.count() == 0: logging.warning("Returning a dummy slit, no points above clip %g" % clip) edge = 3.0 #slit = [edge,0.5*ny,nx-1.0-edge,0.5*ny] # @todo file a bug, this failed # RuntimeError: (/var/rpmbuild/BUILD/casa-test/casa-test-4.5.7/code/imageanalysis/ImageAnalysis/PVGenerator.cc : 334) Failed AlwaysAssert abs( (endPixRot[0] - startPixRot[0]) - sqrt(xdiff*xdiff + ydiff*ydiff) ) < 1e-6 slit = [edge, 0.5 * ny - 0.1, nx - 1.0 - edge, 0.5 * ny + 0.1] else: slit = convert_to_slit(m, x, y, nx, ny, gamma) return (slit, clip)
def run(self): """Runs the task. Parameters ---------- None Returns ------- None """ self._summary = {} pvslicesummary = [] sumslicetype = 'slice' sliceargs = [] dt = utils.Dtime("PVSlice") # import here, otherwise sphinx cannot parse from impv import impv from imsmooth import imsmooth pvslice = self.getkey('slice') # x_s,y_s,x_e,y_e (start and end of line) pvslit = self.getkey('slit') # x_c,y_c,len,pa (center, length and PA of line) # BDP's used : # b10 = input BDP # b11 = input BDP (moment) # b12 = input BDP (new style cubestats w/ maxpos) # b2 = output BDP b10 = self._bdp_in[0] # input SpwCube fin = b10.getimagefile(bt.CASA) # input name b11 = self._bdp_in[1] # b12 = self._bdp_in[2] clip = self.getkey('clip') # clipping to data for Moment-of-Inertia gamma = self.getkey('gamma') # gamma factor to data for Moment-of-Inertia if b11 != None and len(pvslice) == 0 and len(pvslit) == 0: # if a map (e.g. cubesum ) given, and no slice/slit, get a best pvslice from that (pvslice,clip) = map_to_slit(self.dir(b11.getimagefile(bt.CASA)),clip=clip,gamma=gamma) elif b12 != None and len(pvslice) == 0 and len(pvslit) == 0: # PPP doesn't seem to work too well yet logging.debug("testing new slice computation from a PPP") max = b12.table.getColumnByName("max") maxposx = b12.table.getColumnByName("maxposx") maxposy = b12.table.getColumnByName("maxposy") if maxposx == None: raise Exception,"PPP was not enabled in your CubeStats" (pvslice,clip) = tab_to_slit([maxposx,maxposy,max],clip=clip,gamma=gamma) sliceargs = deepcopy(pvslice) if len(sliceargs)==0: logging.warning("no slice for plot yet") # ugh, this puts single quotes around the numbers formattedslice = str(["%.2f" % a for a in sliceargs]) taskargs = "slice="+formattedslice dt.tag("slice") pvname = self.mkext(fin,'pv') # output image name b2 = PVSlice_BDP(pvname) self.addoutput(b2) width = self.getkey('width') # @todo also: "4arcsec" (can't work since it's a single keyword) if len(pvslice) == 4: start = pvslice[:2] # @todo also allow: ["14h20m20.5s","-30d45m25.4s"] end = pvslice[2:] impv(self.dir(fin), self.dir(pvname),"coords",start=start,end=end,width=width,overwrite=True) elif len(pvslit) == 4: sumslicetype = 'slit' sliceargs = deepcopy(pvslit) formattedslice = str(["%.2f" % a for a in sliceargs]) taskargs = "slit="+formattedslice # length="40arcsec" same as {"value": 40, "unit": "arcsec"}) center = pvslit[:2] # @todo also: ["14h20m20.5s","-30d45m25.4s"]. length = pvslit[2] # @todo also: "40arcsec", {"value": 40, "unit": "arcsec"}) if type(pvslit[3]) is float or type(pvslit[3]) is int: pa = "%gdeg" % pvslit[3] else: pa = pvslit[3] impv(self.dir(fin), self.dir(pvname),"length",center=center,length=length,pa=pa,width=width,overwrite=True) else: raise Exception,"no valid input slit= or slice= or bad Moment_BDP input" sliceargs.append(width) taskargs = taskargs + " width=%d" % width dt.tag("impv") smooth = self.getkey('pvsmooth') if len(smooth) > 0: if len(smooth) == 1: smooth.append(smooth[0]) major = '%dpix' % smooth[0] minor = '%dpix' % smooth[1] logging.info("imsmooth PV slice: %s %s" % (major,minor)) imsmooth(self.dir(pvname), outfile=self.dir(pvname)+'.smooth',kernel='boxcar',major=major,minor=minor) dt.tag("imsmooth") # utils.rename(self.dir(pvname)+'.smooth',self.dir(pvname)) # @todo we will keep the smooth PVslice for inspection, no further flow work # get some statistics data = casautil.getdata_raw(self.dir(pvname)) rpix = stats.robust(data.flatten()) r_mean = rpix.mean() r_std = rpix.std() r_max = rpix.max() logging.info("PV stats: mean/std/max %f %f %f" % (r_mean, r_std, r_max)) logging.regression("PVSLICE: %f %f %f" % (r_mean, r_std, r_max)) myplot = APlot(ptype=self._plot_type,pmode=self._plot_mode,abspath=self.dir()) # hack to get a slice on a mom0map # @todo if pmode is not png, can viewer handle this? figname = pvname + ".png" slicename = self.dir(figname) overlay = "pvoverlay" if b11 != None: f11 = b11.getimagefile(bt.CASA) taskinit.tb.open(self.dir(f11)) data = taskinit.tb.getcol('map') nx = data.shape[0] ny = data.shape[1] taskinit.tb.close() d1 = np.flipud(np.rot90 (data.reshape((nx,ny)))) if len(pvslice) == 4: segm = [[pvslice[0],pvslice[2],pvslice[1],pvslice[3]]] pa = np.arctan2(pvslice[2]-pvslice[0],pvslice[1]-pvslice[3])*180.0/np.pi title = "PV Slice location : slice PA=%.1f" % pa elif len(pvslit) == 4: # can only do this now if using pixel coordinates xcen = pvslit[0] ycen = ny-pvslit[1]-1 slen = pvslit[2] pard = pvslit[3]*np.pi/180.0 cosp = np.cos(pard) sinp = np.sin(pard) halflen = 0.5*slen segm = [[xcen-halflen*sinp,xcen+halflen*sinp,ycen-halflen*cosp,ycen+halflen*cosp]] pa = pvslit[3] title = "PV Slice location : slit PA=%g" % pa else: # bogus, some error in pvslice logging.warning("bogus segm since pvslice=%s" % str(pvslice)) segm = [[10,20,10,20]] pa = -999.999 title = "PV Slice location - bad PA" logging.info("MAP1 segm %s %s" % (str(segm),str(pvslice))) if d1.max() < clip: logging.warning("datamax=%g, clip=%g" % (d1.max(), clip)) title = title + ' (no signal over %g?)' % clip myplot.map1(d1,title,overlay,segments=segm,thumbnail=True) else: myplot.map1(d1,title,overlay,segments=segm,range=[clip],thumbnail=True) dt.tag("plot") overlayname = myplot.getFigure(figno=myplot.figno,relative=True) overlaythumbname = myplot.getThumbnail(figno=myplot.figno,relative=True) Qover = True else: Qover = False implot = ImPlot(pmode=self._plot_mode,ptype=self._plot_type,abspath=self.dir()) implot.plotter(rasterfile=pvname, figname=pvname, colorwedge=True) thumbname = implot.getThumbnail(figno=implot.figno,relative=True) figname = implot.getFigure(figno=implot.figno,relative=True) if False: # debug: # # @todo tmp1 is ok, tmp2 is not displaying the whole thing # old style: viewer() seems to plot full image, but imview() wants square pixels? casa.viewer(infile=self.dir(pvname), outfile=self.dir('tmp1.pv.png'), gui=False, outformat="png") casa.imview(raster={'file':self.dir(pvname), 'colorwedge' : True, 'scaling':-1}, axes={'y':'Declination'}, out=self.dir('tmp2.pv.png')) # # -> this one works, axes= should be correct # imview(raster={'file':'x.pv', 'colorwedge' : True, 'scaling':-1},axes={'y':'Frequency'}) # # @TODO big fixme, we're going to reuse 'tmp1.pv.png' because implot give a broken view figname = 'tmp1.pv.png' # @todo technically we don't know what map it was overlay'd on.... CubeSum/Moment0 overlaycaption = "Location of position-velocity slice overlaid on a CubeSum map" pvcaption = "Position-velocity diagram through emission centroid" pvimage = Image(images={bt.CASA : pvname, bt.PNG : figname},thumbnail=thumbname,thumbnailtype=bt.PNG, description=pvcaption) b2.setkey("image",pvimage) b2.setkey("mean",float(r_mean)) b2.setkey("sigma",float(r_std)) if Qover: thispvsummary = [sumslicetype,sliceargs,figname,thumbname,pvcaption,overlayname,overlaythumbname,overlaycaption,pvname,fin] else: thispvsummary = [sumslicetype,sliceargs,figname,thumbname,pvcaption,pvname,fin] # Yes, this is a nested list. Against the day when PVSLICE can # compute multiple slices per map. pvslicesummary.append(thispvsummary) self._summary["pvslices"] = SummaryEntry(pvslicesummary,"PVSlice_AT",self.id(True),taskargs) dt.tag("done") dt.end()
def run(self): """Runs the task. Parameters ---------- None Returns ------- None """ self._summary = {} dt = utils.Dtime("CubeStats") #maxvrms = 2.0 # maximum variation in rms allowed (hardcoded for now) #maxvrms = -1.0 # turn maximum variation in rms allowed off maxvrms = self.getkey("maxvrms") psample = -1 psample = self.getkey("psample") # BDP's used : # b1 = input BDP # b2 = output BDP b1 = self._bdp_in[0] fin = b1.getimagefile(bt.CASA) bdp_name = self.mkext(fin,'cst') b2 = CubeStats_BDP(bdp_name) self.addoutput(b2) # PeakPointPlot use_ppp = self.getkey("ppp") # peakstats: not enabled for mortal users yet # peakstats = (psample=1, numsigma=4, minchan=3, maxgap=2, peakfit=False) pnumsigma = 4 minchan = 3 maxgap = 2 peakfit = False # True will enable a true gaussian fit # numsigma: adding all signal > numsigma ; not user enabled; for peaksum. numsigma = -1.0 numsigma = 3.0 # grab the new robust statistics. If this is used, 'rms' will be the RMS, # else we will use RMS = 1.4826*MAD (MAD does a decent job on outliers as well) # and was the only method available before CASA 4.4 when robust was implemented robust = self.getkey("robust") rargs = casautil.parse_robust(robust) nrargs = len(rargs) if nrargs == 0: sumrargs = "medabsdevmed" # for the summary, indicate the default robust else: sumrargs = str(rargs) self._summary["rmsmethd"] = SummaryEntry([sumrargs,fin],"CubeStats_AT",self.id(True)) #@todo think about using this instead of putting 'fin' in all the SummaryEntry #self._summary["casaimage"] = SummaryEntry(fin,"CubeStats_AT",self.id(True)) # extra CASA call to get the freq's in GHz, as these are not in imstat1{} # @todo what if the coordinates are not in FREQ ? # Note: CAS-7648 bug on 3D cubes if False: # csys method ia.open(self.dir(fin)) csys = ia.coordsys() spec_axis = csys.findaxisbyname("spectral") # ieck, we need a valid position, or else it will come back and "Exception: All selected pixels are masked" #freqs = ia.getprofile(spec_axis, region=rg.box([0,0],[0,0]))['coords']/1e9 #freqs = ia.getprofile(spec_axis)['coords']/1e9 freqs = ia.getprofile(spec_axis,unit="GHz")['coords'] dt.tag("getprofile") else: # old imval method #imval0 = casa.imval(self.dir(fin),box='0,0,0,0') # this fails on 3D imval0 = casa.imval(self.dir(fin)) freqs = imval0['coords'].transpose()[2]/1e9 dt.tag("imval") nchan = len(freqs) chans = np.arange(nchan) # call CASA to get what we want # imstat0 is the whole cube, imstat1 the plane based statistics # warning: certain robust stats (**rargs) on the whole cube are going to be very slow dt.tag("start") imstat0 = casa.imstat(self.dir(fin), logfile=self.dir('imstat0.logfile'),append=False,**rargs) dt.tag("imstat0") imstat1 = casa.imstat(self.dir(fin),axes=[0,1],logfile=self.dir('imstat1.logfile'),append=False,**rargs) dt.tag("imstat1") # imm = casa.immoments(self.dir(fin),axis='spec', moments=8, outfile=self.dir('ppp.im')) if nrargs > 0: # need to get the peaks without rubust imstat10 = casa.imstat(self.dir(fin), logfile=self.dir('imstat0.logfile'),append=True) dt.tag("imstat10") imstat11 = casa.imstat(self.dir(fin),axes=[0,1],logfile=self.dir('imstat1.logfile'),append=True) dt.tag("imstat11") # grab the relevant plane-based things from imstat1 if nrargs == 0: mean = imstat1["mean"] sigma = imstat1["medabsdevmed"]*1.4826 # see also: astropy.stats.median_absolute_deviation() peakval = imstat1["max"] minval = imstat1["min"] else: mean = imstat1["mean"] sigma = imstat1["rms"] peakval = imstat11["max"] minval = imstat11["min"] if True: # work around a bug in imstat(axes=[0,1]) for last channel [CAS-7697] for i in range(len(sigma)): if sigma[i] == 0.0: minval[i] = peakval[i] = 0.0 # too many variations in the RMS ? sigma_pos = sigma[np.where(sigma>0)] smin = sigma_pos.min() smax = sigma_pos.max() logging.info("sigma varies from %f to %f; %d/%d channels ok" % (smin,smax,len(sigma_pos),len(sigma))) if maxvrms > 0: if smax/smin > maxvrms: cliprms = smin * maxvrms logging.warning("sigma varies too much, going to clip to %g (%g > %g)" % (cliprms, smax/smin, maxvrms)) sigma = np.where(sigma < cliprms, sigma, cliprms) # @todo (and check again) for foobar.fits all sigma's became 0 when robust was selected # was this with mask=True/False? # PeakPointPlot (can be expensive, hence the option) if use_ppp: logging.info("Computing MaxPos for PeakPointPlot") xpos = np.zeros(nchan) ypos = np.zeros(nchan) peaksum = np.zeros(nchan) ia.open(self.dir(fin)) for i in range(nchan): if sigma[i] > 0.0: plane = ia.getchunk(blc=[0,0,i,-1],trc=[-1,-1,i,-1],dropdeg=True) v = ma.masked_invalid(plane) v_abs = np.absolute(v) max = np.unravel_index(v_abs.argmax(), v_abs.shape) xpos[i] = max[0] ypos[i] = max[1] if numsigma > 0.0: peaksum[i] = ma.masked_less(v,numsigma * sigma[i]).sum() peaksum = np.nan_to_num(peaksum) # put 0's where nan's are found ia.close() dt.tag("ppp") nzeros = len(np.where(sigma<=0.0)) if nzeros > 0: zeroch = np.where(sigma<=0.0) logging.warning("There are %d fully masked channels (%s)" % (nzeros,str(zeroch))) # construct the admit Table for CubeStats_BDP # note data needs to be a tuple, later to be column_stack'd if use_ppp: labels = ["channel" ,"frequency" ,"mean" ,"sigma" ,"max" ,"maxposx" ,"maxposy" ,"min", "peaksum"] units = ["number" ,"GHz" ,"Jy/beam" ,"Jy/beam" ,"Jy/beam" ,"number" ,"number" ,"Jy/beam", "Jy"] data = (chans ,freqs ,mean ,sigma ,peakval ,xpos ,ypos ,minval, peaksum) else: labels = ["channel" ,"frequency" ,"mean" ,"sigma" ,"max" ,"min"] units = ["number" ,"GHz" ,"Jy/beam" ,"Jy/beam" ,"Jy/beam" ,"Jy/beam"] data = (chans ,freqs ,mean ,sigma ,peakval ,minval) table = Table(columns=labels,units=units,data=np.column_stack(data)) b2.setkey("table",table) # get the full cube statistics, it depends if robust was pre-selected if nrargs == 0: mean0 = imstat0["mean"][0] sigma0 = imstat0["medabsdevmed"][0]*1.4826 peak0 = imstat0["max"][0] b2.setkey("mean" , float(mean0)) b2.setkey("sigma", float(sigma0)) b2.setkey("minval",float(imstat0["min"][0])) b2.setkey("maxval",float(imstat0["max"][0])) b2.setkey("minpos",imstat0["minpos"][:3].tolist()) #? [] or array(..dtype=int32) ?? b2.setkey("maxpos",imstat0["maxpos"][:3].tolist()) #? [] or array(..dtype=int32) ?? logging.info("CubeMax: %f @ %s" % (imstat0["max"][0],str(imstat0["maxpos"]))) logging.info("CubeMin: %f @ %s" % (imstat0["min"][0],str(imstat0["minpos"]))) logging.info("CubeRMS: %f" % sigma0) else: mean0 = imstat0["mean"][0] sigma0 = imstat0["rms"][0] peak0 = imstat10["max"][0] b2.setkey("mean" , float(mean0)) b2.setkey("sigma", float(sigma0)) b2.setkey("minval",float(imstat10["min"][0])) b2.setkey("maxval",float(imstat10["max"][0])) b2.setkey("minpos",imstat10["minpos"][:3].tolist()) #? [] or array(..dtype=int32) ?? b2.setkey("maxpos",imstat10["maxpos"][:3].tolist()) #? [] or array(..dtype=int32) ?? logging.info("CubeMax: %f @ %s" % (imstat10["max"][0],str(imstat10["maxpos"]))) logging.info("CubeMin: %f @ %s" % (imstat10["min"][0],str(imstat10["minpos"]))) logging.info("CubeRMS: %f" % sigma0) b2.setkey("robust",robust) rms_ratio = imstat0["rms"][0]/sigma0 logging.info("RMS Sanity check %f" % rms_ratio) if rms_ratio > 1.5: logging.warning("RMS sanity check = %f. Either bad sidelobes, lotsa signal, or both" % rms_ratio) logging.regression("CST: %f %f" % (sigma0, rms_ratio)) # plots: no plots need to be made when nchan=1 for continuum # however we could make a histogram, overlaying the "best" gauss so # signal deviations are clear? logging.info('mean,rms,S/N=%f %f %f' % (mean0,sigma0,peak0/sigma0)) if nchan == 1: # for a continuum/1-channel we only need to stuff some numbers into the _summary self._summary["chanrms"] = SummaryEntry([float(sigma0), fin], "CubeStats_AT", self.id(True)) self._summary["dynrange"] = SummaryEntry([float(peak0)/float(sigma0), fin], "CubeStats_AT", self.id(True)) self._summary["datamean"] = SummaryEntry([float(mean0), fin], "CubeStats_AT", self.id(True)) else: y1 = np.log10(ma.masked_invalid(peakval)) y2 = np.log10(ma.masked_invalid(sigma)) y3 = y1-y2 y4 = np.log10(ma.masked_invalid(-minval)) y5 = y1-y4 y = [y1,y2,y3,y4] title = 'CubeStats: ' + bdp_name+'_0' xlab = 'Channel' ylab = 'log(Peak,Noise,Peak/Noise)' labels = ['log(peak)','log(rms noise)','log(peak/noise)','log(|minval|)'] myplot = APlot(ptype=self._plot_type,pmode=self._plot_mode,abspath=self.dir()) segp = [[chans[0],chans[nchan-1],math.log10(sigma0),math.log10(sigma0)]] myplot.plotter(chans,y,title,bdp_name+"_0",xlab=xlab,ylab=ylab,segments=segp,labels=labels,thumbnail=True) imfile = myplot.getFigure(figno=myplot.figno,relative=True) thumbfile = myplot.getThumbnail(figno=myplot.figno,relative=True) image0 = Image(images={bt.PNG:imfile},thumbnail=thumbfile,thumbnailtype=bt.PNG,description="CubeStats_0") b2.addimage(image0,"im0") if use_ppp: # new trial for Lee title = 'PeakSum: (numsigma=%.1f)' % (numsigma) ylab = 'Jy*N_ppb' myplot.plotter(chans,[peaksum],title,bdp_name+"_00",xlab=xlab,ylab=ylab,thumbnail=False) if True: # hack ascii table y30 = np.where(sigma > 0, np.log10(peakval/sigma), 0.0) table2 = Table(columns=["freq","log(P/N)"],data=np.column_stack((freqs,y30))) table2.exportTable(self.dir("testCubeStats.tab")) del table2 # the "box" for the "spectrum" is all pixels. Don't know how to # get this except via shape. ia.open(self.dir(fin)) s = ia.summary() ia.close() if 'shape' in s: specbox = (0,0,s['shape'][0],s['shape'][1]) else: specbox = () caption = "Emission characteristics as a function of channel, as derived by CubeStats_AT " caption += "(cyan: global rms," caption += " green: noise per channel," caption += " blue: peak value per channel," caption += " red: peak/noise per channel)." self._summary["spectra"] = SummaryEntry([0, 0, str(specbox), 'Channel', imfile, thumbfile , caption, fin], "CubeStats_AT", self.id(True)) self._summary["chanrms"] = SummaryEntry([float(sigma0), fin], "CubeStats_AT", self.id(True)) # @todo Will imstat["max"][0] always be equal to s['datamax']? If not, why not? if 'datamax' in s: self._summary["dynrange"] = SummaryEntry([float(s['datamax']/sigma0), fin], "CubeStats_AT", self.id(True)) else: self._summary["dynrange"] = SummaryEntry([float(imstat0["max"][0]/sigma0), fin], "CubeStats_AT", self.id(True)) self._summary["datamean"] = SummaryEntry([imstat0["mean"][0], fin], "CubeStats_AT", self.id(True)) title = bdp_name + "_1" xlab = 'log(Peak,Noise,P/N)' myplot.histogram([y1,y2,y3],title,bdp_name+"_1",xlab=xlab,thumbnail=True) imfile = myplot.getFigure(figno=myplot.figno,relative=True) thumbfile = myplot.getThumbnail(figno=myplot.figno,relative=True) image1 = Image(images={bt.PNG:imfile},thumbnail=thumbfile,thumbnailtype=bt.PNG,description="CubeStats_1") b2.addimage(image1,"im1") # note that the 'y2' can have been clipped, which can throw off stats.robust() # @todo should set a mask for those. title = bdp_name + "_2" xlab = 'log(Noise))' n = len(y2) ry2 = stats.robust(y2) y2_mean = ry2.mean() y2_std = ry2.std() if n>9: logging.debug("NORMALTEST2: %s" % str(scipy.stats.normaltest(ry2))) myplot.hisplot(y2,title,bdp_name+"_2",xlab=xlab,gauss=[y2_mean,y2_std],thumbnail=True) title = bdp_name + "_3" xlab = 'log(diff[Noise])' n = len(y2) # dy2 = y2[0:-2] - y2[1:-1] dy2 = ma.masked_equal(y2[0:-2] - y2[1:-1],0.0).compressed() rdy2 = stats.robust(dy2) dy2_mean = rdy2.mean() dy2_std = rdy2.std() if n>9: logging.debug("NORMALTEST3: %s" % str(scipy.stats.normaltest(rdy2))) myplot.hisplot(dy2,title,bdp_name+"_3",xlab=xlab,gauss=[dy2_mean,dy2_std],thumbnail=True) title = bdp_name + "_4" xlab = 'log(Signal/Noise))' n = len(y3) ry3 = stats.robust(y3) y3_mean = ry3.mean() y3_std = ry3.std() if n>9: logging.debug("NORMALTEST4: %s" % str(scipy.stats.normaltest(ry3))) myplot.hisplot(y3,title,bdp_name+"_4",xlab=xlab,gauss=[y3_mean,y3_std],thumbnail=True) title = bdp_name + "_5" xlab = 'log(diff[Signal/Noise)])' n = len(y3) dy3 = y3[0:-2] - y3[1:-1] rdy3 = stats.robust(dy3) dy3_mean = rdy3.mean() dy3_std = rdy3.std() if n>9: logging.debug("NORMALTEST5: %s" % str(scipy.stats.normaltest(rdy3))) myplot.hisplot(dy3,title,bdp_name+"_5",xlab=xlab,gauss=[dy3_mean,dy3_std],thumbnail=True) title = bdp_name + "_6" xlab = 'log(Peak+Min)' n = len(y1) ry5 = stats.robust(y5) y5_mean = ry5.mean() y5_std = ry5.std() if n>9: logging.debug("NORMALTEST6: %s" % str(scipy.stats.normaltest(ry5))) myplot.hisplot(y5,title,bdp_name+"_6",xlab=xlab,gauss=[y5_mean,y5_std],thumbnail=True) logging.debug("LogPeak: m,s= %f %f min/max %f %f" % (y1.mean(),y1.std(),y1.min(),y1.max())) logging.debug("LogNoise: m,s= %f %f %f %f min/max %f %f" % (y2.mean(),y2.std(),y2_mean,y2_std,y2.min(),y2.max())) logging.debug("LogDeltaNoise: RMS/sqrt(2)= %f %f " % (dy2.std()/math.sqrt(2),dy2_std/math.sqrt(2))) logging.debug("LogDeltaP/N: RMS/sqrt(2)= %f %f" % (dy3.std()/math.sqrt(2),dy3_std/math.sqrt(2))) logging.debug("LogPeak+Min: robust m,s= %f %f" % (y5_mean,y5_std)) # compute two ratios that should both be near 1.0 if noise is 'normal' ratio = y2.std()/(dy2.std()/math.sqrt(2)) ratio2 = y2_std/(dy2_std/math.sqrt(2)) logging.info("RMS BAD VARIATION RATIO: %f %f" % (ratio,ratio2)) # making PPP plot if nchan > 1 and use_ppp: smax = 10 gamma = 0.75 z0 = peakval/peakval.max() # point sizes s = np.pi * ( smax * (z0**gamma) )**2 cmds = ["grid", "axis equal"] title = "Peak Points per channel" pppimage = bdp_name + '_ppp' myplot.scatter(xpos,ypos,title=title,figname=pppimage,size=s,color=chans,cmds=cmds,thumbnail=True) pppimage = myplot.getFigure(figno=myplot.figno,relative=True) pppthumbnail = myplot.getThumbnail(figno=myplot.figno,relative=True) caption = "Peak point plot: Locations of per-channel peaks in the image cube " + fin self._summary["peakpnt"] = SummaryEntry([pppimage, pppthumbnail, caption, fin], "CubeStats_AT", self.id(True)) dt.tag("plotting") # making PeakStats plot if nchan > 1 and psample > 0: logging.info("Computing peakstats") # grab peak,mean and width values for all peaks (pval,mval,wval) = peakstats(self.dir(fin),freqs,sigma0,pnumsigma,minchan,maxgap,psample,peakfit) title = "PeakStats: cutoff = %g" % (sigma0*pnumsigma) xlab = 'Peak value' ylab = 'FWHM (channels)' pppimage = bdp_name + '_peakstats' cval = mval myplot.scatter(pval,wval,title=title,xlab=xlab,ylab=ylab,color=cval,figname=pppimage,thumbnail=False) dt.tag("peakstats") # myplot.final() # pjt debug # all done! dt.tag("done") taskargs = "robust=" + sumrargs if use_ppp: taskargs = taskargs + " ppp=True" else: taskargs = taskargs + " ppp=False" for v in self._summary: self._summary[v].setTaskArgs(taskargs) dt.tag("summary") dt.end()
def find(self): """ Method that does the segment finding Parameters ---------- None Returns ------- Tuple containing a list of the segments, the cutoff used, the noise level, and a mean baseline. """ if self.abs: self.spec = abs(self.spec) temp = np.zeros(len(self.spec)) #self.see = ma.masked_array(temp, mask=self.spec.mask) # parameters (some now from the function argument) logging.debug("MIN/MAX " + str(self.spec.min()) + " / " +\ str(self.spec.max())) n = len(self.spec) # data and freq assumed to be same size if self.hanning: h = np.hanning(5) / 2 # normalize the convolution array h = np.array([0.25, 0.5, 0.25]) data2 = np.convolve(self.spec, h, 'same') else: data2 = self.spec if len(data2) != len(self.freq): raise Exception("ulines: data2 and freq not same array") # do the work dr = stats.robust(data2, self.f) noise = dr.std() logging.debug("ROBUST: (mean/median/noise) " + \ str(dr.mean()) + " / " + str(ma.median(dr)) + " / " + str(noise)) #print "\n\nD2",data2,"\n" data3 = ma.masked_invalid(data2) #print "\n\nD3\n",data3,"\n" ddiff = data3[1:n] - data3[0:n-1] logging.debug("DIFF: (mean/stdev) " + str(ddiff.mean()) +\ " / " + str(ddiff.std())) #print "\n\n",ddiff,"\n",self.f,"\n" ddr = stats.robust(ddiff, self.f) logging.debug("RDIFF: (mean/median/stdev) " + \ str(ddr.mean()) + " / " + str(ma.median(ddr)) + " / " + \ str(ddr.std())) #plt.show() if self.bottom: # first remind the classic dmean1 = dr.mean() dstd1 = dr.std() logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean1) + \ " / " + str(dstd1)) # see if we can find a better one? # k should really depend on nchan, (like an nsigma), 2-3 should be ok for most. k = 2.5 dmin = dr.min() dmax = dr.min() + 2 * k * ddr.std() / 1.414214 logging.debug("DMIN/DMAX: " + str(dmin) + " / " + \ str(dmax)) dm = ma.masked_outside(dr, dmin, dmax) dmean = max(0.0, dm.mean()) # ensure that the mean is positive or 0.0 dstd = dm.std() if self.noise is not None: cutoff = self.pmin * self.noise elif self.nomean: cutoff = self.pmin * dstd else: cutoff = dmean + self.pmin * dstd logging.debug("BETTER MEAN/SIGMA: " + str(dmean) + \ " / " + str(dstd)) else: # classic simple, but fails when robust didn't kill off (enough of) the signal # sigma will be too high, cutoff too high and you could have no lines if there # is one strong lines dmean = dr.mean() dstd = dr.std() if self.noise is not None: cutoff = self.pmin * self.noise elif self.nomean: cutoff = self.pmin * dstd else: cutoff = dmean + self.pmin * dstd logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean) + \ " / " + str(dstd)) logging.debug("SEGMENTS: f=%g pmin=%g maxgap=%d minchan=%d" % \ (self.f, self.pmin, self.maxgap, self.minchan)) #print "\nDATA\n\n",data2,"\n\n" segments = self.line_segments(data2, cutoff) #print "SEGMENTS",segments nlines = len(segments) logging.debug("Found %d segments above cutoff %f" % \ (nlines, cutoff)) segp = [] rmax = data2.max() + 0.1 # + 0.05*(data2.max()-data2.min()) segp.append([self.freq[0], self.freq[n - 1], cutoff, cutoff]) segp.append([self.freq[0], self.freq[n - 1], dmean, dmean]) for (l, s) in zip(range(nlines), segments): ch0 = s[0] ch1 = s[1] sum0 = sum(data2[ch0:ch1+1]) sum1 = sum(self.freq[ch0:ch1+1] * data2[ch0:ch1+1]) sum2 = sum(self.freq[ch0:ch1+1] * self.freq[ch0:ch1+1] * data2[ch0:ch1+1]) lmean = sum1 / sum0 # this fails for weaker lines, so wrapped it in a abs lsigma = math.sqrt(abs(sum2 / sum0 - lmean * lmean)) lmax = max(data2[ch0:ch1+1]) if self.peak != None: lpeak = 1000*max(self.peak[ch0:ch1+1]) else: lpeak = max(self.spec[ch0:ch1+1]) # @todo if we ever allow minchan=1 lsigma would be 0.0.... should we adopt channel width? lfwhm = 2.355 * lsigma / lmean * utils.c logging.debug( "Line in %2d channels %4d - %4d @ %.4f GHz +/- %.4f GHz log(S/N) = %.2f FWHM %5.1f km/s %.2f" % \ (ch1 - ch0 + 1, ch0, ch1, lmean, lsigma, lmax, lfwhm, lpeak)) segp.append([self.freq[ch0], self.freq[ch1], rmax, rmax]) segp.append([lmean, lmean, rmax - 0.1, rmax + 0.05]) return Segments.Segments(segments, nchan=len(self.spec)), cutoff, dstd, dmean
def find(self): """ Method that does the segment finding Parameters ---------- None Returns ------- Tuple containing a list of the segments, the cutoff used, the noise level, and a mean baseline. """ if self.abs: self.spec = abs(self.spec) temp = np.zeros(len(self.spec)) #self.see = ma.masked_array(temp, mask=self.spec.mask) # parameters (some now from the function argument) logging.debug("MIN/MAX " + str(self.spec.min()) + " / " +\ str(self.spec.max())) n = len(self.spec) # data and freq assumed to be same size if self.hanning: h = np.hanning(5) / 2 # normalize the convolution array h = np.array([0.25, 0.5, 0.25]) data2 = np.convolve(self.spec, h, 'same') else: data2 = self.spec if len(data2) != len(self.freq): raise Exception("ulines: data2 and freq not same array") # do the work dr = stats.robust(data2, self.f) noise = dr.std() logging.debug("ROBUST: (mean/median/noise) " + \ str(dr.mean()) + " / " + str(ma.median(dr)) + " / " + str(noise)) #print "\n\nD2",data2,"\n" data3 = ma.masked_invalid(data2) #print "\n\nD3\n",data3,"\n" ddiff = data3[1:n] - data3[0:n - 1] logging.debug("DIFF: (mean/stdev) " + str(ddiff.mean()) +\ " / " + str(ddiff.std())) #print "\n\n",ddiff,"\n",self.f,"\n" ddr = stats.robust(ddiff, self.f) logging.debug("RDIFF: (mean/median/stdev) " + \ str(ddr.mean()) + " / " + str(ma.median(ddr)) + " / " + \ str(ddr.std())) #plt.show() if self.bottom: # first remind the classic dmean1 = dr.mean() dstd1 = dr.std() logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean1) + \ " / " + str(dstd1)) # see if we can find a better one? # k should really depend on nchan, (like an nsigma), 2-3 should be ok for most. k = 2.5 dmin = dr.min() dmax = dr.min() + 2 * k * ddr.std() / 1.414214 logging.debug("DMIN/DMAX: " + str(dmin) + " / " + \ str(dmax)) dm = ma.masked_outside(dr, dmin, dmax) dmean = max(0.0, dm.mean()) # ensure that the mean is positive or 0.0 dstd = dm.std() if self.noise is not None: cutoff = self.pmin * self.noise elif self.nomean: cutoff = self.pmin * dstd else: cutoff = dmean + self.pmin * dstd logging.debug("BETTER MEAN/SIGMA: " + str(dmean) + \ " / " + str(dstd)) else: # classic simple, but fails when robust didn't kill off (enough of) the signal # sigma will be too high, cutoff too high and you could have no lines if there # is one strong lines dmean = dr.mean() dstd = dr.std() if self.noise is not None: cutoff = self.pmin * self.noise elif self.nomean: cutoff = self.pmin * dstd else: cutoff = dmean + self.pmin * dstd logging.debug("CLASSIC MEAN/SIGMA: " + str(dmean) + \ " / " + str(dstd)) logging.debug("SEGMENTS: f=%g pmin=%g maxgap=%d minchan=%d" % \ (self.f, self.pmin, self.maxgap, self.minchan)) #print "\nDATA\n\n",data2,"\n\n" segments = self.line_segments(data2, cutoff) #print "SEGMENTS",segments nlines = len(segments) logging.debug("Found %d segments above cutoff %f" % \ (nlines, cutoff)) segp = [] rmax = data2.max() + 0.1 # + 0.05*(data2.max()-data2.min()) segp.append([self.freq[0], self.freq[n - 1], cutoff, cutoff]) segp.append([self.freq[0], self.freq[n - 1], dmean, dmean]) for (l, s) in zip(range(nlines), segments): ch0 = s[0] ch1 = s[1] sum0 = sum(data2[ch0:ch1 + 1]) sum1 = sum(self.freq[ch0:ch1 + 1] * data2[ch0:ch1 + 1]) sum2 = sum(self.freq[ch0:ch1 + 1] * self.freq[ch0:ch1 + 1] * data2[ch0:ch1 + 1]) lmean = sum1 / sum0 # this fails for weaker lines, so wrapped it in a abs lsigma = math.sqrt(abs(sum2 / sum0 - lmean * lmean)) lmax = max(data2[ch0:ch1 + 1]) if self.peak != None: lpeak = 1000 * max(self.peak[ch0:ch1 + 1]) else: lpeak = max(self.spec[ch0:ch1 + 1]) # @todo if we ever allow minchan=1 lsigma would be 0.0.... should we adopt channel width? lfwhm = 2.355 * lsigma / lmean * utils.c logging.debug( "Line in %2d channels %4d - %4d @ %.4f GHz +/- %.4f GHz log(S/N) = %.2f FWHM %5.1f km/s %.2f" % \ (ch1 - ch0 + 1, ch0, ch1, lmean, lsigma, lmax, lfwhm, lpeak)) segp.append([self.freq[ch0], self.freq[ch1], rmax, rmax]) segp.append([lmean, lmean, rmax - 0.1, rmax + 0.05]) return Segments.Segments(segments, nchan=len(self.spec)), cutoff, dstd, dmean