def check_calibration( segs, times, timeseries, FAPthrs): """ checks the pipeline's calibration at each "FAPthr in FAPThrs" this may be sped up with a call to timeseries_to_livetime() instead of idq.timeseries_to_segments() -> event.livetime however, we currently use some segment logic and it is not clear that we can avoid actually generating segments """ idq_livetime = event.livetime(segs) segments = [] deadtimes = [] statedFAPs = [] errs = [] for FAPthr in FAPthrs: SEGS = [] max_statedFAP = 0.0 for (t, ts) in zip(times, timeseries): (_segs, _min_ts) = idq.timeseries_to_segments(t, -ts, -FAPthr) # we want FAP <= FAPthr <--> -FAP >= -FAPthr SEGS += (_segs) ### ensure this is a list in case the behavior of timeseries_to_segments changes... if _min_ts != None: statedFAP = -_min_ts if max_statedFAP < statedFAP: max_statedFAP = statedFAP SEGS = event.andsegments([SEGS, segs]) segments.append(SEGS) SEGS_livetime = event.livetime(SEGS) if not idq_livetime: if SEGS_livetime: raise ValueError("something is weird with segments... idq_livetime is zero but SEGS_livetime is not") else: deadtime = 0.0 else: deadtime = 1.0 * SEGS_livetime / idq_livetime if deadtime > 1.0: raise ValueError("deadtime > 1.0, something is weird...\n SEGS_livetime = %f\n idq_livetime = %f"%(SEGS_livetime, idq_livetime)) deadtimes.append( deadtime ) statedFAPs.append(max_statedFAP) if max_statedFAP > 0: err = deadtime/max_statedFAP - 1 elif deadtime: err = 1 else: err = 0 errs.append( err ) return segments, deadtimes, statedFAPs, errs
for gps in args: print "gps : %.9f" % (gps) minwin = opts.window ### go find triggers if opts.verbose: print "\tdiscoverying KW triggers within [%.9f, %.9f]" % (gps - opts.window, gps + opts.window) ### figure out which files you want filenames = [] coverage = [] for gdsdir in kwgdsdirs: for filename in idq.get_all_files_in_range(gdsdir, gps - opts.window, gps + opts.window, pad=0, suffix=".trg"): seg = idq.extract_start_stop(filename, suffix=".trg") if not event.livetime(event.andsegments([coverage, [seg]])): coverage = event.fixsegments(coverage + [seg]) filenames.append(filename) ### figure out the extent of the coverage if len(event.include([[gps]], coverage, tcent=0)) == 0: if opts.force: if opts.verbose: print "no triggers found for gps : %.3f" % (gps) continue else: raise ValueError("no triggers found for gps : %.3f" % (gps)) for s, e in coverage: if s < gps: if gps - s < minwin: minwin = gps - s
def extract_ovl_vconfigs( rank_frames, channame, traindir, start, end, metric='eff/dt' ): """ returns a dictionary mapping active vconfigs to segments does NOT include "none" channel """ vconfigs = [] for rnkfr in rank_frames: trained, calib = idq.extract_timeseries_ranges( rnkfr ) classifier = idq.extract_fap_name( rnkfr ) vetolist = glob.glob( "%s/%d_%d/ovl/ovl/*vetolist.eval"%(traindir, trained[0], trained[1]) ) if len(vetolist) != 1: raise ValueError( "trouble finding a single vetolist file for : %s"%rnkfr ) vetolist=vetolist[0] v = event.loadstringtable( vetolist ) rankmap = { 0:[(None, None, None, None, 0, 0)] } for line in v: metric_exp = float(line[ovl.vD['metric_exp']]) if metric == 'eff/dt': rnk = ovl.effbydt_to_rank( metric_exp ) elif metric == 'vsig': rnk = ovl.vsig_to_rank( metric_exp ) elif metric == 'useP': rnk = ovl.useP_to_rank( metric_exp ) else: raise ValueError("metric=%s not understood"%metric) if rankmap.has_key(rnk): rankmap[rnk].append( (line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk )) else: rankmap[rnk] = [(line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk )] for key, value in rankmap.items(): rankmap[key] = tuple(value) t, ts = idq.combine_gwf( [rnkfr], [channame]) t = t[0] truth = (start <= t)*(t <= end) t = t[truth] ts = ts[0][truth] if not len(ts): continue configs = rankmap[ts[0]] segStart = t[0] for T, TS in zip(t, ts): if rankmap[TS] != configs: vconfigs.append( (configs, [segStart, T] ) ) segStart = T configs = rankmap[TS] else: pass vconfigs.append( (configs, [segStart, T+t[1]-t[0]] ) ) configs = {} for vconfig, seg in vconfigs: if configs.has_key( vconfig ): configs[vconfig].append( seg ) else: configs[vconfig] = [ seg ] for key, value in configs.items(): value = event.andsegments( [event.fixsegments( value ), [[start,end]] ] ) if event.livetime( value ): configs[key] = event.fixsegments( value ) else: raise ValueError("somehow picked up a config with zero livetime...") return vconfigs, configs, {"vchan":0, "vthr":1, "vwin":2, "metric":3, "metric_exp":4, "rank":5}
# update mappings via uroc files #=============================================================================================== ### find all *dat files, bin them according to classifier ### needed for opts.mode=="dat" and KDE estimates logger.info('finding all *dat files') datsD = defaultdict( list ) for dat in idq.get_all_files_in_range(realtimedir, gpsstart-lookback, gpsstart+stride, pad=0, suffix='.dat' ): datsD[idq.extract_dat_name( dat )].append( dat ) ### throw away any un-needed files for key in datsD.keys(): if key not in classifiers: datsD.pop(key) else: ### throw out files that don't contain any science time datsD[key] = [ dat for dat in datsD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(dat, suffix='.dat')]])) ] if opts.mode=="npy": ### need rank files ### find all *rank*npy.gz files, bin them according to classifier logger.info(' finding all *rank*.npy.gz files') ranksD = defaultdict( list ) for rank in [rank for rank in idq.get_all_files_in_range(realtimedir, gpsstart-lookback, gpsstart+stride, pad=0, suffix='.npy.gz') if "rank" in rank]: ranksD[idq.extract_fap_name( rank )].append( rank ) ### should just work... ### throw away files we will never need for key in ranksD.keys(): if key not in classifiers: ### throw away unwanted files ranksD.pop(key) else: ### keep only files that overlap with scisegs ranksD[key] = [ rank for rank in ranksD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(rank, suffix='.npy.gz')]])) ]
#--- ### look up KW trg files that intersect segs if opts.verbose: print( "finding relevant kw_trgfiles" ) kw_trgfiles = [] ### iterate over different configurations used in training for kwconf, dirname in eval(config.get('general', 'kw')).items(): ### this is kinda ugly... if opts.verbose: print( " searching for KW trgfiles corresponding to %s in %s within [%.3f, %.3f]"%(kwconf, dirname, segs[0][0], segs[-1][1]) ) ### iterate over all trg files found in that directory for trgfile in idq.get_all_files_in_range(dirname, segs[0][0], segs[-1][1], pad=0, suffix='.trg'): ### check whether there is some overlap ### not gauranteed if there are gaps between min and max gps times if event.livetime(event.andsegments([[idq.extract_start_stop(trgfile, suffix='.trg')], segs])): if opts.verbose: print( " kept : "+trgfile ) kw_trgfiles.append( trgfile ) elif opts.verbose: print( " discarded : "+trgfile ) #--- if opts.verbose: print( "evaluating %d times using %d KW trgfiles"%(Ngps, len(kw_trgfiles) ) ) ### set up output pointers if opts.output_filename: datfile = os.path.basename(opts.output_filename) output_dir = os.path.dirname(opts.output_filename)
wait = opts.gps + delay - int(idq.nowgps()) if wait > 0: print 'waiting %.1f seconds before processing' % wait time.sleep(wait) #==================== # science segments #==================== if opts.use_science_segments: print 'querrying science segments' ### get DMT xml segments from disk ### logic about waiting and re-trying is built into retrieve_scisegs good, covered = idq.retrieve_scisegs(dmt_segments_location, dmtdq_name, gps_start, twopadding, pad=0, sleep=delay, nretry=1, logger=logger) if event.livetime(covered) < twopadding: raise Warning('unknown science coverage, skipping') continue elif event.livetime(good) < twopadding: raise Warning('incomplete science coverage, skipping') continue print 'complete science coverage' else: print 'analyzing data regardless of science segements' #==================== # samples -> just the supplied gps time #====================
rank_channame = idq.channame(ifo, opts.classifier, "%s_rank"%tag) fap_channame = idq.channame(ifo, opts.classifier, "%s_fap"%tag) fapUL_channame = idq.channame(ifo, opts.classifier, "%s_fapUL"%tag) flavor = config.get(opts.classifier, 'flavor') if config.has_option(opts.classifier, 'plotting_label'): plotting_label = config.get(opts.classifier, 'plotting_label') else: plotting_label = opts.classifier #=================================================================================================== ### Find all FAP files if opts.verbose: print "finding all fap*gwf files" faps = [fap for fap in idq.get_all_files_in_range( realtimedir, opts.start, opts.end, pad=0, suffix='.gwf') if ('fap' in fap) and (opts.classifier==idq.extract_fap_name( fap )) and event.livetime(event.andsegments([[idq.extract_start_stop(fap, suffix=".gwf")], idqsegs])) ] ### compute total time covered #T = event.livetime( [idq.extract_start_stop(fap, suffix='.gwf') for fap in faps] )*1.0 T = event.livetime( idqsegs )*1.0 ### combine timeseries and generate segments if opts.verbose: print "generating segments from %d fap files"%(len(faps)) segs = dict( (fapThr, [[], 1.0]) for fapThr in opts.FAPthr ) t, ts = idq.combine_gwf(faps, [fap_channame]) for t, ts in zip(t, ts): t, ts = idq.timeseries_in_segments( t, ts, idqsegs ) for fapThr in opts.FAPthr:
def check_calibartion( realtimedir, start, end, classifier, FAPthrs, verbose=False, ): """ ....checks the pipeline's calibration at each "FAPthr in FAPThrs" """ # ================================================= # grab idq_segments so we compute meaningful livetimes # ================================================= if verbose: print 'getting idq_segements' idq_segs = idq.get_idq_segments(realtimedir, start, end, suffix='.npy.gz') idq_livetime = event.livetime(idq_segs) # ================================================= # grab relevant data # ================================================= if verbose: print 'looking for *_fap_*.npy.gz files in', opts.realtimedir fapfilenames = [filename for filename in idq.get_all_files_in_range(realtimedir, start, end, pad=0, suffix='.npy.gz') if '_fap_' in filename and classifier in filename] if opts.verbose: print 'discovered %d files' % len(fapfilenames) print 'building time-series' (times, timeseries) = idq_gdb_utils.combine_ts(fapfilenames) # ================================================= # check calibration # ================================================= segments = [] deadtimes = [] statedFAPs = [] for FAPthr in FAPthrs: if verbose: print 'computing segements for FAPthr =', FAPthr segs = [] max_statedFAP = None for (t, ts) in zip(times, timeseries): (_segs, _min_ts) = timeseries_to_segments(t, -ts, -FAPthr) # we want FAP <= FAPthr <--> -FAP >= FAPthr segs += _segs if _min_ts != None: statedFAP = -_min_ts if max_statedFAP < statedFAP: max_statedFAP = statedFAP segs = event.andsegments([segs, idq_segs]) segments.append(segs) deadtimes.append(1.0 * event.livetime(segs) / idq_livetime) statedFAPs.append(statedFAP) return (idq_segs, segments, deadtimes, statedFAPs)
(opts, args) = parser.parse_args() FAPthrs = [float(l) for l in opts.FAPthrs.split()] (idq_segs, segments, deadtimes, statedFAPs) = check_calibartion( opts.realtimedir, opts.start, opts.end, opts.classifier, FAPthrs, verbose=opts.verbose, ) # ## Report! print 'idq_livetime = %.3f' % event.livetime(idq_segs) report_str = \ """ FAPthr = %.5f stated FAP = %.5f deadtime = %.5f %s difference = %.3f%s""" for (FAPthr, deadtime, statedFAP) in zip(FAPthrs, deadtimes, statedFAPs): # ## Report! print report_str % ( FAPthr,
f.close() #======================== # go findeth the frame data #======================== logger.info(' finding all *fap*.gwf files') fapsD = defaultdict( list ) for fap in [fap for fap in idq.get_all_files_in_range(realtimedir, lookup_startgps, lookup_endgps, pad=0, suffix='.gwf') if "fap" in fap]: fapsD[idq.extract_fap_name( fap )].append( fap ) ### throw away files we will never need for key in fapsD.keys(): if key not in opts.classifier: ### throw away unwanted files fapsD.pop(key) else: ### keep only files that overlap with scisegs fapsD[key] = [ fap for fap in fapsD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(fap, suffix='.gwf')]])) ] #======================== # iterate through classifiers -> generate segments #======================== ### set up xml document from glue.ligolw import ligolw from glue.ligolw import utils as ligolw_utils from glue.ligolw import lsctables from glue.ligolw.utils import process xmldoc = ligolw.Document() xml_element = ligolw.LIGO_LW() xmldoc.appendChild( xml_element )
segfilename = "%s/%s1_%s-%d-%d.xml"%(opts.output_dir, ifo, opts.flag.replace(":","_"), start, end-start) cmd = "ligolw_segment_query_dqsegdb -t %s -q -a %s1:%s -s %d -e %d -o %s"%(opts.segdb_url, ifo, opts.flag, start, end, segfilename) if opts.verbose: print "querying %s segments for %s\n %s"%(opts.flag, ifo, cmd) output = sp.Popen( cmd.split(), stdout=sp.PIPE, stderr=sp.PIPE ).communicate() ### iterate over segments, computing PSDs xmldoc = ligolw_utils.load_filename(segfilename, contenthandler=lsctables.use_in(ligolw.LIGOLWContentHandler)) segs = event.andsegments( [segs, [[row.start_time, row.end_time] for row in table.get_table(xmldoc, lsctables.SegmentTable.tableName)]] ) ### take intersection of segments ### write intersection to ascii file segfilename = "%s/intersection-%d-%d.seg"%(opts.output_dir, start, end-start) if opts.verbose: print "found %d sec of joint livetime"%(event.livetime(segs)) print "writing : %s"%(segfilename) file_obj = open(segfilename, 'w') for s, e in segs: print >> file_obj, s, e file_obj.close() #------------------------------------------------- ### iterate through segments and estimte PSD for each IFO if opts.verbose: print " processing:" for s, e in segs: while s < e: _e = min(e, s+opts.psd_dur) if opts.verbose:
logger.info('ERROR: segment generation failed. Skipping this calibration period.') if opts.force: ### we are require successful training or else we want errors logger.info(traceback.print_exc()) raise e else: ### we don't care if any particular training job fails gpsstart += stride continue logger.info('finding idq segments') idqsegs = idq.get_idq_segments(realtimedir, gpsstart-lookback, gpsstart+stride, suffix='.dat') logger.info('taking intersection between science segments and idq segments') idqsegs = event.andsegments( [scisegs, idqsegs] ) idqsegs_livetime = event.livetime( idqsegs ) ### write segment file if opts.ignore_science_segments: idqseg_path = idq.idqsegascii(output_dir, '', gpsstart-lookback, lookback+stride) else: idqseg_path = idq.idqsegascii(output_dir, '_%s'%dq_name, gpsstart - lookback, lookback+stride) f = open(idqseg_path, 'w') for seg in idqsegs: print >> f, seg[0], seg[1] f.close() #=============================================================================================== # update mappings via uroc files #===============================================================================================