def arraystat_2 (table, sid, season=0, rob=True, per=True, flags=0) : """ Calculates a complicated number of parameters for a given star. Inputs: table -- an ATpy table with time-series photometry sid -- a WFCAM source ID. Optional inputs: season -- which season to select (1,2,3, or other=All) rob -- also use Robust statistics? (takes longer, default True) per -- run period-finding? (takes longer, default True) flags -- Maximum ppErrBit quality flags to use (default 0) Returns: ret -- a data structure containing the computed values. """ s_table = data_cut( table, [sid], season=season, flags=flags ) if len(s_table) < 1: print "no data for %d!" % sid return None jcol = s_table.JAPERMAG3; jerr = s_table.JAPERMAG3ERR hcol = s_table.HAPERMAG3; herr = s_table.HAPERMAG3ERR kcol = s_table.KAPERMAG3; kerr = s_table.KAPERMAG3ERR jmhcol=s_table.JMHPNT ; jmherr = s_table.JMHPNTERR hmkcol=s_table.HMKPNT ; hmkerr = s_table.HMKPNTERR racol= s_table.RA decol= s_table.DEC date = s_table.MEANMJDOBS messy_table = data_cut( table, [sid], season=-1 ) jppcol=messy_table.JPPERRBITS hppcol=messy_table.HPPERRBITS kppcol=messy_table.KPPERRBITS # make an empty data structure and just assign it information, then return # the object itself!!! then there's no more worrying about indices. class Empty(): pass ret = Empty() ret.N = len(s_table) ret.RA = racol.mean() ret.DEC = decol.mean() ret.chip = get_chip(date[0], np.degrees(racol[0]), np.degrees(decol[0])) if ret.N > 4: ret.one_chip = ( get_chip(date[0], racol[0], decol[0]) == get_chip(date[1], racol[1], decol[1]) == get_chip(date[2], racol[2], decol[2]) == get_chip(date[3], racol[3], decol[3]) ) else: ret.one_chip = True ret.Stetson = stetson.S(jcol, jerr, hcol, herr, kcol, kerr) ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ] for b in bands: # use b.data, b.err b.rchi2 = reduced_chisq( b.data, b.err ) b.mean = b.data.mean() b.rms = b.data.std() b.min = b.data.min() b.max = b.data.max() b.peak_trough = b.max - b.min b.mean_err = b.err.mean() # Robust quantifiers simply have an "r" at the end of their names if rob: b.datar = rb.removeoutliers(b.data, 3, niter=2) b.meanr = rb.meanr(b.data) b.rmsr = rb.stdr(b.data) b.minr = b.datar.min() b.maxr = b.datar.max() b.peak_troughr = b.maxr - b.minr # Period finding... is a little dodgy still, and might take forever if per: b.lsp = lsp(date, b.data, 6., 6.) # apologies if this is cluttered Jmax = lsp_mask(b.lsp[0], b.lsp[1]) b.lsp_per = 1./ b.lsp[0][Jmax] b.lsp_pow = b.lsp[1][Jmax] b.fx2_per = 1./ test_analyze( date, b.data, b.err ) # Finally we'll want to do the whole slope, distance on the JMH graph # (until I get the fitting done, we'll have to use hmk and jmh naively) ret.color_slope = (ret.jmh.peak_trough / ret.hmk.peak_trough) # and the pp_max, using the messy table ret.jpp_max = jppcol.max() ret.hpp_max = hppcol.max() ret.kpp_max = kppcol.max() return ret
def remove_nights2 (table, analysis): ''' Removes nights from a table based on the output of analyze_nights2. Must be called after analyze_nights2. Parameters ---------- table : atpy.Table a data table of time-series photometry from the WFCAM Science Archive (WSA). Must include all magnitudes, colors, color errors, ppErrBits, and MeanMjdObs information. analysis : tuple The output of analyze_nights2. Returns ------- newtable : atpy.Table The same data table, with all data from bad nights removed. badnights : ndarray The list of timestamps that were rejected. cuts : list of tuples The parameters used for rejecting nights. Order of each tuple: (mean_hmk, mean_jmh, spread_hmk, spread_jmh) ''' # First, let's relabel analyze_nights2' output. nights, hmk, jmh = analysis #this depends on what analyze_nights outputs master_cloudy = [] cuts = [] for night_set, mean_hmk, mean_jmh in zip(nights, hmk, jmh): # Let's call in the big guns... robust statistics and outlier clipping # or, you know, some way to return the outliers. # Get the robust standard deviation and then write down everyone above. middle_hmk = rb.meanr(mean_hmk) middle_jmh = rb.meanr(mean_jmh) spread_hmk = rb.stdr(mean_hmk) spread_jmh = rb.stdr(mean_jmh) ellipse = np.sqrt( ((mean_hmk - middle_hmk)/spread_hmk)**2 + ((mean_jmh - middle_jmh)/spread_jmh)**2 ) cloudy = night_set[ ellipse > 3 ] print "let's test this." print "middle: ", middle_hmk print "spread: ", spread_hmk print "n_outliers: ", cloudy.size # now let's save cloudy master_cloudy.extend(cloudy) cuts.append((middle_hmk, middle_jmh, spread_hmk, spread_jmh)) # now let's remove the nights clean_data = table.where( np.array([night not in master_cloudy for night in table.MEANMJDOBS]) ) return clean_data, master_cloudy, cuts
def statcruncher (table, sid, season=0, rob=True, per=True, graded=False, colorslope=False, flags=0) : """ Calculates several statistical properties for a given star. Will work with "lonely" datapoints (i.e. not all JHK mags are well-defined). Optionally works with graded data, too! Parameters ---------- table : atpy.Table Table with time-series photometry sid : int 13-digit WFCAM source ID of star to plot season : int, optional Which observing season of our dataset (1, 2, 3, or all). Any value that is not the integers (1, 2, or 3) will be treated as "no season", and no time-cut will be made. Note that this is the default behavior. rob : bool, optional Use robust statistics, in addition to normal ones? (takes longer, default True) per : bool, optional Run period-finding? Uses fast chi-squared and lomb-scargle. (takes longer, default True) graded : bool, optional Also calculate Stetson indices using quality grades as weights? Uses stetson_graded; requires that the data has been graded by night_cleanser.null_cleanser_grader(). colorslope : bool, optional Calculate color slopes? Runs them over (JvJ-H, KvH-K, J-HvH-K). Make sure your data has been color-error-corrected! Default False. flags : int, optional Maximum ppErrBit quality flags to use (default 0) Returns ------- ret : data structure Contains the computed values. They can be accessed as attributes (e.g., "ret.j_mean" or "ret.Stetson"). """ s_table = data_cut ( table, sid, season=season) if len(s_table) < 1: print "no data for %d!" % sid return None # First, let's compute single-band statistics. This will require # separate data_cuts on each band. full_jtable = band_cut(s_table, 'j') full_htable = band_cut(s_table, 'h') full_ktable = band_cut(s_table, 'k') j_table = band_cut(s_table, 'j', max_flag=flags) h_table = band_cut(s_table, 'h', max_flag=flags) k_table = band_cut(s_table, 'k', max_flag=flags) jmh_table = band_cut(j_table, 'h', max_flag=flags) hmk_table = band_cut(h_table, 'k', max_flag=flags) # jhk_table used only for colorslope jhk_table = band_cut( jmh_table, 'k', max_flag=flags) # get a date (x-axis) for each jdate = j_table.MEANMJDOBS hdate = h_table.MEANMJDOBS kdate = k_table.MEANMJDOBS jmhdate = jmh_table.MEANMJDOBS hmkdate = hmk_table.MEANMJDOBS # date = s_table.MEANMJDOBS # get a magnitude and magnitude error for each band jcol = j_table.JAPERMAG3; jerr = j_table.JAPERMAG3ERR hcol = h_table.HAPERMAG3; herr = h_table.HAPERMAG3ERR kcol = k_table.KAPERMAG3; kerr = k_table.KAPERMAG3ERR jmhcol= jmh_table.JMHPNT; jmherr = jmh_table.JMHPNTERR hmkcol= hmk_table.HMKPNT; hmkerr = hmk_table.HMKPNTERR # get the RA and DEC columns, checking for sensible values racol= s_table.RA[(s_table.RA > 0) & (s_table.RA < 7)] decol= s_table.DEC[(s_table.DEC > -4) & (s_table.DEC < 4)] # Now let's get some ability to track errorful data. # messy_table_j = band_cut( s_table, 'j') # messy_table_h = band_cut( s_table, 'h') # messy_table_k = band_cut( s_table, 'k') # jppcol = messy_table_j.JPPERRBITS # hppcol = messy_table_h.HPPERRBITS # kppcol = messy_table_k.KPPERRBITS # make an empty data structure and just assign it information, then return # the object itself! then there's no more worrying about indices. class Empty(): pass ret = Empty() # How many nights have observations in each band? ret.N_j = len(j_table) ret.N_h = len(h_table) ret.N_k = len(k_table) # What's the distribution of flags and nights? js = full_jtable.JPPERRBITS hs = full_htable.HPPERRBITS ks = full_ktable.KPPERRBITS ret.N_j_noflag = len(js[js == 0]) ret.N_h_noflag = len(hs[hs == 0]) ret.N_k_noflag = len(ks[ks == 0]) ret.N_j_info = len(js[(js < 256) & (js > 0)]) ret.N_h_info = len(hs[(hs < 256) & (hs > 0)]) ret.N_k_info = len(ks[(ks < 256) & (ks > 0)]) ret.N_j_warn = len(js[ js >= 256 ]) ret.N_h_warn = len(hs[ hs >= 256 ]) ret.N_k_warn = len(ks[ ks >= 256 ]) # Mean position of this source ret.RA = racol.mean() ret.DEC = decol.mean() # Calculate the Stetson index... S, choice, stetson_nights = Stetson_machine (s_table, flags) ret.Stetson = S ret.Stetson_choice = choice ret.Stetson_N = stetson_nights if graded: # Calculate the graded Stetson index... g_S, g_choice, g_stetson_nights = ( graded_Stetson_machine (s_table, flags) ) ret.graded_Stetson = g_S ret.graded_Stetson_choice = g_choice ret.graded_Stetson_N = g_stetson_nights # Calculate PSTAR parameters ret.pstar_mean = s_table.PSTAR.mean() ret.pstar_median = np.median(s_table.PSTAR) ret.pstar_rms = s_table.PSTAR.std() # Create parallel data structures for each band, so we can iterate ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr; ret.j.date = jdate ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr; ret.h.date = hdate ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr; ret.k.date = kdate ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr ret.jmh.date = jmhdate; ret.hmk.date = hmkdate ret.j.N = ret.N_j ; ret.h.N = ret.N_h ; ret.k.N = ret.N_k ret.jmh.N = len(jmh_table) ; ret.hmk.N = len(hmk_table) bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ] for b in bands: # use b.data, b.err # if this band is empty, don't try to do the following assignments if b.N == 0: continue b.rchi2 = reduced_chisq( b.data, b.err ) b.mean = b.data.mean() b.median = np.median(b.data) # dao b.rms = b.data.std() b.min = b.data.min() b.max = b.data.max() b.range = b.max - b.min b.err_mean = b.err.mean() #dao b.err_median = np.median(b.err) #dao b.err_rms = b.err.std() #dao b.err_min = b.err.min() #dao b.err_max = b.err.max() #dao b.err_range = b.err_max - b.err_min #dao # Robust quantifiers simply have an "r" at the end of their names if rob: b.datar, b.indr = rb.removeoutliers(b.data, 3, niter=2, retind=True) b.errr = b.err[b.indr] b.meanr = rb.meanr(b.data) b.medianr = rb.medianr(b.data) # dao b.rmsr = rb.stdr(b.data) b.minr = b.datar.min() b.maxr = b.datar.max() b.ranger = b.maxr - b.minr b.err_meanr = b.errr.mean() # dao b.err_medianr = np.median(b.errr) #dao b.err_rmsr = b.errr.std() #dao b.err_minr = b.errr.min() #dao b.err_maxr = b.errr.max() #dao b.err_ranger = b.err_maxr - b.err_minr #dao # Period finding... is a little dodgy still, and might take forever if per==True and b.N > 2: hifac = lsp_tuning(b.date) b.lsp = lsp(b.date, b.data, 6., hifac) Jmax = lsp_mask(b.lsp[0], b.lsp[1]) b.lsp_per = 1./ b.lsp[0][Jmax] b.lsp_pow = b.lsp[1][Jmax] b.lsp_sig = getSignificance(b.lsp[0], b.lsp[1], b.lsp[2], 6.)[Jmax] best_freq, chimin = test_analyze( b.date, b.data, b.err, ret_chimin=True ) b.fx2_per, b.fx2_chimin = 1./best_freq, chimin if colorslope: # J vs J-H : use jmh_table exclusively (ret.jjh_slope, a, ret.jjh_slope_err) = ( slope( jmh_table.JMHPNT, jmh_table.JAPERMAG3, jmh_table.JMHPNTERR, jmh_table.JAPERMAG3ERR, verbose=False) ) # K vs H-K : use hmk_table exclusively (ret.khk_slope, a, ret.khk_slope_err) = ( slope( hmk_table.HMKPNT, hmk_table.KAPERMAG3, hmk_table.HMKPNTERR, hmk_table.KAPERMAG3ERR, verbose=False) ) # J-H vs H-K : use jhk_table exclusively (ret.jhk_slope, a, ret.jhk_slope_err) = ( slope( jhk_table.HMKPNT, jhk_table.JMHPNT, jhk_table.HMKPNTERR, jhk_table.JMHPNTERR, verbose=False) ) # and the pp_max, using the messy table # (slated for a re-implementation) # ret.jpp_max = jppcol.max() # ret.hpp_max = hppcol.max() # ret.kpp_max = kppcol.max() return ret