def extract_ovl_vconfigs(rank_frames, channame, traindir, start, end, metric='eff/dt'): """ returns a dictionary mapping active vconfigs to segments does NOT include "none" channel """ vconfigs = [] for rnkfr in rank_frames: trained, calib = idq.extract_timeseries_ranges(rnkfr) classifier = idq.extract_fap_name(rnkfr) vetolist = glob.glob("%s/%d_%d/ovl/ovl/*vetolist.eval" % (traindir, trained[0], trained[1])) if len(vetolist) != 1: raise ValueError( "trouble finding a single vetolist file for : %s" % rnkfr) vetolist = vetolist[0] v = event.loadstringtable(vetolist) rankmap = {0: [(None, None, None, None, 0, 0)]} for line in v: metric_exp = float(line[ovl.vD['metric_exp']]) if metric == 'eff/dt': rnk = ovl.effbydt_to_rank(metric_exp) elif metric == 'vsig': rnk = ovl.vsig_to_rank(metric_exp) elif metric == 'useP': rnk = ovl.useP_to_rank(metric_exp) else: raise ValueError("metric=%s not understood" % metric) if rankmap.has_key(rnk): rankmap[rnk].append( (line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk)) else: rankmap[rnk] = [ (line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk) ] for key, value in rankmap.items(): rankmap[key] = tuple(value) t, ts = idq.combine_gwf([rnkfr], [channame]) t = t[0] truth = (start <= t) * (t <= end) t = t[truth] ts = ts[0][truth] if not len(ts): continue configs = rankmap[ts[0]] segStart = t[0] for T, TS in zip(t, ts): if rankmap[TS] != configs: vconfigs.append((configs, [segStart, T])) segStart = T configs = rankmap[TS] else: pass vconfigs.append((configs, [segStart, T + t[1] - t[0]])) configs = {} for vconfig, seg in vconfigs: if configs.has_key(vconfig): configs[vconfig].append(seg) else: configs[vconfig] = [seg] for key, value in configs.items(): value = event.andsegments([event.fixsegments(value), [[start, end]]]) if event.livetime(value): configs[key] = event.fixsegments(value) else: raise ValueError( "somehow picked up a config with zero livetime...") return vconfigs, configs, { "vchan": 0, "vthr": 1, "vwin": 2, "metric": 3, "metric_exp": 4, "rank": 5 }
flavor = config.get(opts.classifier, 'flavor') if config.has_option(opts.classifier, 'plotting_label'): plotting_label = config.get(opts.classifier, 'plotting_label') else: plotting_label = opts.classifier #=================================================================================================== ### Find all FAP files if opts.verbose: print "finding all fap*gwf files" faps = [ fap for fap in idq.get_all_files_in_range( realtimedir, opts.start, opts.end, pad=0, suffix='.gwf') if ('fap' in fap) and ( opts.classifier == idq.extract_fap_name(fap)) and event.livetime( event.andsegments([[idq.extract_start_stop(fap, suffix=".gwf")], idqsegs])) ] ### compute total time covered #T = event.livetime( [idq.extract_start_stop(fap, suffix='.gwf') for fap in faps] )*1.0 T = event.livetime(idqsegs) * 1.0 ### combine timeseries and generate segments if opts.verbose: print "generating segments from %d fap files" % (len(faps)) segs = dict((fapThr, [[], 1.0]) for fapThr in opts.FAPthr) t, ts = idq.combine_gwf(faps, [fap_channame]) for t, ts in zip(t, ts):
#=================================================================================================== # get all *.gwf files in range if opts.verbose: print "Finding relevant *.gwf files" rank_filenames = [] fap_filenames = [] all_files = idq.get_all_files_in_range(realtimedir, opts.plotting_gps_start, opts.plotting_gps_end, pad=0, suffix='.gwf') for filename in all_files: if opts.classifier == idq.extract_fap_name( filename): # and ifo in filename: ### this last bit not needed? if 'rank' in filename: rank_filenames.append(filename) if 'fap' in filename: fap_filenames.append(filename) rank_filenames.sort() fap_filenames.sort() if (not rank_filenames) or ( not fap_filenames): # we couldn't find either rank or fap files # exit gracefully if opts.verbose: print "no iDQ timeseries for %s at %s" % (opts.classifier, ifo) if not opts.skip_gracedb_upload: gracedb.writeLog(opts.gracedb_id,
rank_channame = idq.channame(ifo, opts.classifier, "%s_rank"%tag) fap_channame = idq.channame(ifo, opts.classifier, "%s_fap"%tag) fapUL_channame = idq.channame(ifo, opts.classifier, "%s_fapUL"%tag) flavor = config.get(opts.classifier, 'flavor') if config.has_option(opts.classifier, 'plotting_label'): plotting_label = config.get(opts.classifier, 'plotting_label') else: plotting_label = opts.classifier #=================================================================================================== ### Find all FAP files if opts.verbose: print "finding all fap*gwf files" faps = [fap for fap in idq.get_all_files_in_range( realtimedir, opts.start, opts.end, pad=0, suffix='.gwf') if ('fap' in fap) and (opts.classifier==idq.extract_fap_name( fap )) and event.livetime(event.andsegments([[idq.extract_start_stop(fap, suffix=".gwf")], idqsegs])) ] ### compute total time covered #T = event.livetime( [idq.extract_start_stop(fap, suffix='.gwf') for fap in faps] )*1.0 T = event.livetime( idqsegs )*1.0 ### combine timeseries and generate segments if opts.verbose: print "generating segments from %d fap files"%(len(faps)) segs = dict( (fapThr, [[], 1.0]) for fapThr in opts.FAPthr ) t, ts = idq.combine_gwf(faps, [fap_channame]) for t, ts in zip(t, ts): t, ts = idq.timeseries_in_segments( t, ts, idqsegs ) for fapThr in opts.FAPthr:
] if opts.mode == "npy": ### need rank files ### find all *rank*npy.gz files, bin them according to classifier logger.info(' finding all *rank*.npy.gz files') ranksD = defaultdict(list) for rank in [ rank for rank in idq.get_all_files_in_range(realtimedir, gpsstart - lookback, gpsstart + stride, pad=0, suffix='.npy.gz') if "rank" in rank ]: ranksD[idq.extract_fap_name(rank)].append( rank) ### should just work... ### throw away files we will never need for key in ranksD.keys(): if key not in classifiers: ### throw away unwanted files ranksD.pop(key) else: ### keep only files that overlap with scisegs ranksD[key] = [ rank for rank in ranksD[key] if event.livetime( event.andsegments([ idqsegs, [idq.extract_start_stop(rank, suffix='.npy.gz')] ])) ]
def extract_ovl_vconfigs( rank_frames, channame, traindir, start, end, metric='eff/dt' ): """ returns a dictionary mapping active vconfigs to segments does NOT include "none" channel """ vconfigs = [] for rnkfr in rank_frames: trained, calib = idq.extract_timeseries_ranges( rnkfr ) classifier = idq.extract_fap_name( rnkfr ) vetolist = glob.glob( "%s/%d_%d/ovl/ovl/*vetolist.eval"%(traindir, trained[0], trained[1]) ) if len(vetolist) != 1: raise ValueError( "trouble finding a single vetolist file for : %s"%rnkfr ) vetolist=vetolist[0] v = event.loadstringtable( vetolist ) rankmap = { 0:[(None, None, None, None, 0, 0)] } for line in v: metric_exp = float(line[ovl.vD['metric_exp']]) if metric == 'eff/dt': rnk = ovl.effbydt_to_rank( metric_exp ) elif metric == 'vsig': rnk = ovl.vsig_to_rank( metric_exp ) elif metric == 'useP': rnk = ovl.useP_to_rank( metric_exp ) else: raise ValueError("metric=%s not understood"%metric) if rankmap.has_key(rnk): rankmap[rnk].append( (line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk )) else: rankmap[rnk] = [(line[ovl.vD['vchan']], float(line[ovl.vD['vthr']]), float(line[ovl.vD['vwin']]), metric, metric_exp, rnk )] for key, value in rankmap.items(): rankmap[key] = tuple(value) t, ts = idq.combine_gwf( [rnkfr], [channame]) t = t[0] truth = (start <= t)*(t <= end) t = t[truth] ts = ts[0][truth] if not len(ts): continue configs = rankmap[ts[0]] segStart = t[0] for T, TS in zip(t, ts): if rankmap[TS] != configs: vconfigs.append( (configs, [segStart, T] ) ) segStart = T configs = rankmap[TS] else: pass vconfigs.append( (configs, [segStart, T+t[1]-t[0]] ) ) configs = {} for vconfig, seg in vconfigs: if configs.has_key( vconfig ): configs[vconfig].append( seg ) else: configs[vconfig] = [ seg ] for key, value in configs.items(): value = event.andsegments( [event.fixsegments( value ), [[start,end]] ] ) if event.livetime( value ): configs[key] = event.fixsegments( value ) else: raise ValueError("somehow picked up a config with zero livetime...") return vconfigs, configs, {"vchan":0, "vthr":1, "vwin":2, "metric":3, "metric_exp":4, "rank":5}
for dat in idq.get_all_files_in_range(realtimedir, gpsstart-lookback, gpsstart+stride, pad=0, suffix='.dat' ): datsD[idq.extract_dat_name( dat )].append( dat ) ### throw away any un-needed files for key in datsD.keys(): if key not in classifiers: datsD.pop(key) else: ### throw out files that don't contain any science time datsD[key] = [ dat for dat in datsD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(dat, suffix='.dat')]])) ] if opts.mode=="npy": ### need rank files ### find all *rank*npy.gz files, bin them according to classifier logger.info(' finding all *rank*.npy.gz files') ranksD = defaultdict( list ) for rank in [rank for rank in idq.get_all_files_in_range(realtimedir, gpsstart-lookback, gpsstart+stride, pad=0, suffix='.npy.gz') if "rank" in rank]: ranksD[idq.extract_fap_name( rank )].append( rank ) ### should just work... ### throw away files we will never need for key in ranksD.keys(): if key not in classifiers: ### throw away unwanted files ranksD.pop(key) else: ### keep only files that overlap with scisegs ranksD[key] = [ rank for rank in ranksD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(rank, suffix='.npy.gz')]])) ] #==================== # update uroc for each classifier #==================== urocs = {} ### stores uroc files for kde estimation for classifier in classifiers: ### write list of dats to cache file cache = idq.cache(output_dir, classifier, "_datcache%s"%usertag)
rank_channame = idq.channame(ifo, opts.classifier, "%s_rank"%tag) fap_channame = idq.channame(ifo, opts.classifier, "%s_fap"%tag) fapUL_channame = idq.channame(ifo, opts.classifier, "%s_fapUL"%tag) #=================================================================================================== # get all *.gwf files in range if opts.verbose: print "Finding relevant *.gwf files" rank_filenames = [] fap_filenames = [] all_files = idq.get_all_files_in_range(realtimedir, opts.plotting_gps_start, opts.plotting_gps_end, pad=0, suffix='.gwf') for filename in all_files: if opts.classifier == idq.extract_fap_name(filename): # and ifo in filename: ### this last bit not needed? if 'rank' in filename: rank_filenames.append(filename) if 'fap' in filename: fap_filenames.append(filename) rank_filenames.sort() fap_filenames.sort() if (not rank_filenames) or (not fap_filenames): # we couldn't find either rank or fap files # exit gracefully if opts.verbose: print "no iDQ timeseries for %s at %s"%(opts.classifier, ifo) if not opts.skip_gracedb_upload: gracedb.writeLog(opts.gracedb_id, message="No iDQ timeseries for %s at %s"%(opts.classifier, ifo)) sys.exit(0)
if opts.ignore_science_segments: idqseg_path = idq.idqsegascii(opts.output_dir, '', startgps, stride) else: idqseg_path = idq.idqsegascii(opts.output_dir, '_%s'%dq_name, startgps , stride) f = open(idqseg_path, 'w') for seg in idqsegs: print >> f, seg[0], seg[1] f.close() #======================== # go findeth the frame data #======================== logger.info(' finding all *fap*.gwf files') fapsD = defaultdict( list ) for fap in [fap for fap in idq.get_all_files_in_range(realtimedir, lookup_startgps, lookup_endgps, pad=0, suffix='.gwf') if "fap" in fap]: fapsD[idq.extract_fap_name( fap )].append( fap ) ### throw away files we will never need for key in fapsD.keys(): if key not in opts.classifier: ### throw away unwanted files fapsD.pop(key) else: ### keep only files that overlap with scisegs fapsD[key] = [ fap for fap in fapsD[key] if event.livetime(event.andsegments([idqsegs, [idq.extract_start_stop(fap, suffix='.gwf')]])) ] #======================== # iterate through classifiers -> generate segments #======================== ### set up xml document from glue.ligolw import ligolw from glue.ligolw import utils as ligolw_utils