def test_read_write_cache(self): cache = self.make_cache()[0] with tempfile.NamedTemporaryFile() as f: io_cache.write_cache(cache, f) f.seek(0) # read from fileobj c2 = io_cache.read_cache(f) assert cache == c2 # write with file name io_cache.write_cache(cache, f.name) # read from file name c3 = io_cache.read_cache(f.name) assert cache == c3
def test_write_lal_cache(tmpdir): cache = [ "/test/path/X-TEST-0-1.txt", "/test/path/X-TEST-2-3.txt", ] target = tmpdir.join("cache.lcf") utils.write_lal_cache(str(target), cache) assert read_cache(str(target)) == cache
def main(args=None): """Run the cache_events tool """ parser = create_parser() args = parser.parse_args(args=args) ifo = args.ifo start = int(args.gpsstart) end = int(args.gpsend) duration = end - start LOGGER.info("-- Welcome to Hveto --") LOGGER.info("GPS start time: %d" % start) LOGGER.info("GPS end time: %d" % end) LOGGER.info("Interferometer: %s" % ifo) # -- initialisation ------------------------------- # read configuration cp = config.HvetoConfigParser(ifo=args.ifo) cp.read(map(str, args.config_file)) LOGGER.info("Parsed configuration file(s)") # format output directory outdir = args.output_directory outdir.mkdir(parents=True, exist_ok=True) LOGGER.info("Working directory: {}".format(outdir)) trigdir = outdir / 'triggers' trigdir.mkdir(parents=True, exist_ok=True) # get segments aflag = cp.get('segments', 'analysis-flag') url = cp.get('segments', 'url') padding = cp.getfloats('segments', 'padding') if args.analysis_segments: segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float) analysis = segs_[aflag] span = SegmentList([Segment(start, end)]) analysis.active &= span analysis.known &= span analysis.coalesce() LOGGER.debug("Segments read from disk") else: analysis = DataQualityFlag.query(aflag, start, end, url=url) LOGGER.debug("Segments recovered from %s" % url) analysis.pad(*padding) livetime = int(abs(analysis.active)) livetimepc = livetime / duration * 100. LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime" % (len(analysis.active), aflag, livetime, livetimepc)) snrs = cp.getfloats('hveto', 'snr-thresholds') minsnr = min(snrs) # -- utility methods ------------------------------ def create_path(channel): ifo, name = channel.split(':', 1) name = name.replace('-', '_') return trigdir / "{}-{}-{}-{}.h5".format(ifo, name, start, duration) def read_and_cache_events(channel, etg, cache=None, trigfind_kw={}, **read_kw): cfile = create_path(channel) # read existing cached triggers and work out new segments to query if args.append and cfile.is_file(): previous = DataQualityFlag.read( str(cfile), path='segments', format='hdf5', ).coalesce() new = analysis - previous else: new = analysis.copy() # get cache of files if cache is None: cache = find_trigger_files(channel, etg, new.active, **trigfind_kw) else: cache = list( filter( lambda e: new.active.intersects_segment(file_segment(e)), cache, )) # restrict 'active' segments to when we have data try: new.active &= cache_segments(cache) except IndexError: new.active = type(new.active)() # find new triggers try: trigs = get_triggers(channel, etg, new.active, cache=cache, raw=True, **read_kw) # catch error and continue except ValueError as e: warnings.warn('%s: %s' % (type(e).__name__, str(e))) else: path = write_events(channel, trigs, new) try: return path, len(trigs) except TypeError: # None return def write_events(channel, tab, segments): """Write events to file with a given filename """ # get filename path = create_path(channel) h5f = h5py.File(str(path), 'a') # read existing table from file try: old = tab.read(h5f["triggers"], format="hdf5") except KeyError: pass else: tab = vstack(old, tab) # append event table tab.write(h5f, path="triggers", append=True, overwrite=True) # write segments try: oldsegs = DataQualityFlag.read(h5f, path="segments", format="hdf5") except KeyError: pass else: segments = oldsegs + segments segments.write(h5f, path="segments", append=True, overwrite=True) # write file to disk h5f.close() return path # -- load channels -------------------------------- # get primary channel name pchannel = cp.get('primary', 'channel') # read auxiliary cache if args.auxiliary_cache is not None: acache = [e for c in args.auxiliary_cache for e in read_cache(str(c))] else: acache = None # load auxiliary channels auxetg = cp.get('auxiliary', 'trigger-generator') auxfreq = cp.getfloats('auxiliary', 'frequency-range') try: auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n') except config.configparser.NoOptionError: auxchannels = find_auxiliary_channels(auxetg, start, ifo=args.ifo, cache=acache) # load unsafe channels list _unsafe = cp.get('safety', 'unsafe-channels') if os.path.isfile(_unsafe): # from file unsafe = set() with open(_unsafe, 'rb') as f: for c in f.read().rstrip('\n').split('\n'): if c.startswith('%(IFO)s'): unsafe.add(c.replace('%(IFO)s', ifo)) elif not c.startswith('%s:' % ifo): unsafe.add('%s:%s' % (ifo, c)) else: unsafe.add(c) else: # or from line-seprated list unsafe = set(_unsafe.strip('\n').split('\n')) unsafe.add(pchannel) cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe))) LOGGER.debug("Read list of %d unsafe channels" % len(unsafe)) # remove duplicates auxchannels = sorted(set(auxchannels)) LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels)) # remove unsafe channels nunsafe = 0 for i in range(len(auxchannels) - 1, -1, -1): if auxchannels[i] in unsafe: LOGGER.warning("Auxiliary channel %r identified as unsafe and has " "been removed" % auxchannels[i]) auxchannels.pop(i) nunsafe += 1 LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe) naux = len(auxchannels) LOGGER.info("Identified %d auxiliary channels to process" % naux) # -- load primary triggers ------------------------- LOGGER.info("Reading events for primary channel...") # read primary cache if args.primary_cache is not None: pcache = [e for c in args.primary_cache for e in read_cache(str(c))] else: pcache = None # get primary params petg = cp.get('primary', 'trigger-generator') psnr = cp.getfloat('primary', 'snr-threshold') pfreq = cp.getfloats('primary', 'frequency-range') preadkw = cp.getparams('primary', 'read-') ptrigfindkw = cp.getparams('primary', 'trigfind-') # load primary triggers out = read_and_cache_events(pchannel, petg, snr=psnr, frange=pfreq, cache=pcache, trigfind_kw=ptrigfindkw, **preadkw) try: e, n = out except TypeError: e = None n = 0 if n: LOGGER.info("Cached %d new events for %s" % (n, pchannel)) elif args.append and e.is_file(): LOGGER.info("Cached 0 new events for %s" % pchannel) else: message = "No events found for %r in %d seconds of livetime" % ( pchannel, livetime) LOGGER.critical(message) # write primary to local cache pname = trigdir / '{}-HVETO_PRIMARY_CACHE-{}-{}.lcf'.format( ifo, start, duration, ) write_lal_cache(str(pname), [e]) LOGGER.info('Primary cache written to {}'.format(pname)) # -- load auxiliary triggers ----------------------- LOGGER.info("Reading triggers for aux channels...") counter = multiprocessing.Value('i', 0) areadkw = cp.getparams('auxiliary', 'read-') atrigfindkw = cp.getparams('auxiliary', 'trigfind-') def read_and_write_aux_triggers(channel): if acache is None: auxcache = None else: ifo, name = channel.split(':') match = "{}-{}".format(ifo, name.replace('-', '_')) auxcache = [e for e in acache if Path(e).name.startswith(match)] out = read_and_cache_events(channel, auxetg, cache=auxcache, snr=minsnr, frange=auxfreq, trigfind_kw=atrigfindkw, **areadkw) try: e, n = out except TypeError: e = None n = 0 # log result of load with counter.get_lock(): counter.value += 1 tag = '[%d/%d]' % (counter.value, naux) if e is None: # something went wrong LOGGER.critical(" %s Failed to read events for %s" % (tag, channel)) else: # either read events or nothing new LOGGER.debug(" %s Cached %d new events for %s" % (tag, n, channel)) return e # map with multiprocessing if args.nproc > 1: pool = multiprocessing.Pool(processes=args.nproc) results = pool.map(read_and_write_aux_triggers, auxchannels) pool.close() # map without multiprocessing else: results = map(read_and_write_aux_triggers, auxchannels) acache = [x for x in results if x is not None] aname = trigdir / '{}-HVETO_AUXILIARY_CACHE-{}-{}.lcf'.format( ifo, start, duration, ) write_lal_cache(str(aname), acache) LOGGER.info('Auxiliary cache written to {}'.format(aname)) # -- finish ---------------------------------------- LOGGER.info('Done, you can use these cache files in an hveto analysis by ' 'passing the following arguments:\n\n--primary-cache {} ' '--auxiliary-cache {}\n'.format(pname, aname))
def main(args=None): """Run the hveto command-line interface """ # declare global variables # this is needed for multiprocessing utilities global acache, analysis, areadkw, atrigfindkw, auxiliary, auxetg global auxfreq, counter, livetime, minsnr, naux, pchannel, primary global rnd, snrs, windows # parse command-line parser = create_parser() args = parser.parse_args(args=args) ifo = args.ifo start = int(args.gpsstart) end = int(args.gpsend) duration = end - start # log startup LOGGER.info("-- Welcome to Hveto --") LOGGER.info("GPS start time: %d" % start) LOGGER.info("GPS end time: %d" % end) LOGGER.info("Interferometer: %s" % ifo) # -- initialisation ------------------------- # read configuration cp = config.HvetoConfigParser(ifo=ifo) cp.read(args.config_file) LOGGER.info("Parsed configuration file(s)") # format output directory outdir = _abs_path(args.output_directory) if not os.path.isdir(outdir): os.makedirs(outdir) os.chdir(outdir) LOGGER.info("Working directory: %s" % outdir) segdir = 'segments' plotdir = 'plots' trigdir = 'triggers' omegadir = 'scans' for d in [segdir, plotdir, trigdir, omegadir]: if not os.path.isdir(d): os.makedirs(d) # prepare html variables htmlv = { 'title': '%s Hveto | %d-%d' % (ifo, start, end), 'config': None, 'prog': PROG, 'context': ifo.lower(), } # get segments aflag = cp.get('segments', 'analysis-flag') url = cp.get('segments', 'url') padding = tuple(cp.getfloats('segments', 'padding')) if args.analysis_segments: segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float) analysis = segs_[aflag] span = SegmentList([Segment(start, end)]) analysis.active &= span analysis.known &= span analysis.coalesce() LOGGER.debug("Segments read from disk") else: analysis = DataQualityFlag.query(aflag, start, end, url=url) LOGGER.debug("Segments recovered from %s" % url) if padding != (0, 0): mindur = padding[0] - padding[1] analysis.active = type(analysis.active)([s for s in analysis.active if abs(s) >= mindur]) analysis.pad(*padding, inplace=True) LOGGER.debug("Padding %s applied" % str(padding)) livetime = int(abs(analysis.active)) livetimepc = livetime / duration * 100. LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime" % (len(analysis.active), aflag, livetime, livetimepc)) # apply vetoes from veto-definer file try: vetofile = cp.get('segments', 'veto-definer-file') except configparser.NoOptionError: vetofile = None else: try: categories = cp.getfloats('segments', 'veto-definer-categories') except configparser.NoOptionError: categories = None # read file vdf = read_veto_definer_file(vetofile, start=start, end=end, ifo=ifo) LOGGER.debug("Read veto-definer file from %s" % vetofile) # get vetoes from segdb vdf.populate(source=url, segments=analysis.active, on_error='warn') # coalesce flags from chosen categories vetoes = DataQualityFlag('%s:VDF-VETOES:1' % ifo) nflags = 0 for flag in vdf: if not categories or vdf[flag].category in categories: vetoes += vdf[flag] nflags += 1 try: deadtime = int(abs(vetoes.active)) / int(abs(vetoes.known)) * 100 except ZeroDivisionError: deadtime = 0 LOGGER.debug("Coalesced %ss (%.2f%%) of deadtime from %d veto flags" % (abs(vetoes.active), deadtime, nflags)) # apply to analysis segments analysis -= vetoes LOGGER.debug("Applied vetoes from veto-definer file") livetime = int(abs(analysis.active)) livetimepc = livetime / duration * 100. LOGGER.info("%ss (%.2f%%) livetime remaining after vetoes" % (livetime, livetimepc)) snrs = cp.getfloats('hveto', 'snr-thresholds') minsnr = min(snrs) windows = cp.getfloats('hveto', 'time-windows') # record all segments segments = DataQualityDict() segments[analysis.name] = analysis # -- load channels -------------------------- # get primary channel name pchannel = cp.get('primary', 'channel') # read auxiliary cache if args.auxiliary_cache is not None: acache = read_cache(args.auxiliary_cache) else: acache = None # load auxiliary channels auxetg = cp.get('auxiliary', 'trigger-generator') auxfreq = cp.getfloats('auxiliary', 'frequency-range') try: auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n') except config.configparser.NoOptionError: auxchannels = find_auxiliary_channels(auxetg, (start, end), ifo=ifo, cache=acache) cp.set('auxiliary', 'channels', '\n'.join(auxchannels)) LOGGER.debug("Auto-discovered %d " "auxiliary channels" % len(auxchannels)) else: auxchannels = sorted(set(auxchannels)) LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels)) # load unsafe channels list _unsafe = cp.get('safety', 'unsafe-channels') if os.path.isfile(_unsafe): # from file unsafe = set() with open(_unsafe, 'rb') as f: for c in f.read().rstrip('\n').split('\n'): if c.startswith('%(IFO)s'): unsafe.add(c.replace('%(IFO)s', ifo)) elif not c.startswith('%s:' % ifo): unsafe.add('%s:%s' % (ifo, c)) else: unsafe.add(c) else: # or from line-seprated list unsafe = set(_unsafe.strip('\n').split('\n')) unsafe.add(pchannel) cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe))) LOGGER.debug("Read list of %d unsafe channels" % len(unsafe)) # remove unsafe channels nunsafe = 0 for i in range(len(auxchannels) - 1, -1, -1): if auxchannels[i] in unsafe: LOGGER.warning("Auxiliary channel %r identified as unsafe and has " "been removed" % auxchannels[i]) auxchannels.pop(i) nunsafe += 1 LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe) naux = len(auxchannels) LOGGER.info("Identified %d auxiliary channels to process" % naux) # record INI file in output HTML directory inifile = '%s-HVETO_CONFIGURATION-%d-%d.ini' % (ifo, start, duration) if os.path.isfile(inifile) and any( os.path.samefile(inifile, x) for x in args.config_file): LOGGER.debug("Cannot write INI file to %s, file was given as input") else: with open(inifile, 'w') as f: cp.write(f) LOGGER.info("Configuration recorded as %s" % inifile) htmlv['config'] = inifile # -- load primary triggers ------------------ # read primary cache if args.primary_cache is not None: pcache = read_cache(args.primary_cache) else: pcache = None # load primary triggers petg = cp.get('primary', 'trigger-generator') psnr = cp.getfloat('primary', 'snr-threshold') pfreq = cp.getfloats('primary', 'frequency-range') preadkw = cp.getparams('primary', 'read-') if pcache is not None: # auto-detect the file format LOGGER.debug('Unsetting the primary trigger file format') preadkw['format'] = None preadkw['path'] = 'triggers' ptrigfindkw = cp.getparams('primary', 'trigfind-') primary = get_triggers(pchannel, petg, analysis.active, snr=psnr, frange=pfreq, cache=pcache, nproc=args.nproc, trigfind_kwargs=ptrigfindkw, **preadkw) fcol, scol = primary.dtype.names[1:3] if len(primary): LOGGER.info("Read %d events for %s" % (len(primary), pchannel)) else: message = "No events found for %r in %d seconds of livetime" % ( pchannel, livetime) LOGGER.critical(message) # cluster primary triggers clusterkwargs = cp.getparams('primary', 'cluster-') if clusterkwargs: primary = primary.cluster(**clusterkwargs) LOGGER.info("%d primary events remain after clustering over %s" % (len(primary), clusterkwargs['rank'])) # -- bail out early ------------------------- # the bail out is done here so that we can at least generate the eventual # configuration file, mainly for HTML purposes # no segments if livetime == 0: message = ("No active segments found for analysis flag %r in interval " "[%d, %d)" % (aflag, start, end)) LOGGER.critical(message) htmlv['context'] = 'info' index = html.write_null_page(ifo, start, end, message, **htmlv) LOGGER.info("HTML report written to %s" % index) sys.exit(0) # no primary triggers if len(primary) == 0: htmlv['context'] = 'danger' index = html.write_null_page(ifo, start, end, message, **htmlv) LOGGER.info("HTML report written to %s" % index) sys.exit(0) # otherwise write all primary triggers to ASCII trigfile = os.path.join( trigdir, '%s-HVETO_RAW_TRIGS_ROUND_0-%d-%d.txt' % (ifo, start, duration), ) primary.write(trigfile, format='ascii', overwrite=True) # -- load auxiliary triggers ---------------- LOGGER.info("Reading triggers for aux channels...") counter = multiprocessing.Value('i', 0) areadkw = cp.getparams('auxiliary', 'read-') if acache is not None: # auto-detect the file format LOGGER.debug('Unsetting the auxiliary trigger file format') areadkw['format'] = None areadkw['path'] = 'triggers' atrigfindkw = cp.getparams('auxiliary', 'trigfind-') # map with multiprocessing if args.nproc > 1: pool = multiprocessing.Pool(processes=args.nproc) results = pool.map(_get_aux_triggers, auxchannels) pool.close() # map without multiprocessing else: results = map(_get_aux_triggers, auxchannels) LOGGER.info("All aux events loaded") auxiliary = dict(x for x in results if x is not None) auxchannels = sorted(auxiliary.keys()) chanfile = '%s-HVETO_CHANNEL_LIST-%d-%d.txt' % (ifo, start, duration) with open(chanfile, 'w') as f: for chan in auxchannels: print(chan, file=f) LOGGER.info("Recorded list of valid auxiliary channels in %s" % chanfile) # -- execute hveto analysis ----------------- minsig = cp.getfloat('hveto', 'minimum-significance') pevents = [primary] pvetoed = [] auxfcol, auxscol = auxiliary[auxchannels[0]].dtype.names[1:3] slabel = plot.get_column_label(scol) flabel = plot.get_column_label(fcol) auxslabel = plot.get_column_label(auxscol) auxflabel = plot.get_column_label(auxfcol) rounds = [] rnd = core.HvetoRound(1, pchannel, rank=scol) rnd.segments = analysis.active while True: LOGGER.info("-- Processing round %d --" % rnd.n) # write segments for this round segfile = os.path.join( segdir, '%s-HVETO_ANALYSIS_SEGS_ROUND_%d-%d-%d.txt' % (ifo, rnd.n, start, duration)) write_ascii_segments(segfile, rnd.segments) # calculate significances for this round if args.nproc > 1: # multiprocessing # separate channel list into chunks and process each chunk pool = multiprocessing.Pool( processes=min(args.nproc, len(auxiliary.keys()))) chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc) results = pool.map(_find_max_significance, chunks) pool.close() winners, sigsets = zip(*results) # find winner of chunk winners winner = sorted(winners, key=lambda w: w.significance)[-1] # flatten sets of significances into one list newsignificances = sigsets[0] for subdict in sigsets[1:]: newsignificances.update(subdict) else: # single process winner, newsignificances = core.find_max_significance( primary, auxiliary, pchannel, snrs, windows, rnd.livetime) LOGGER.info("Round %d winner: %s" % (rnd.n, winner.name)) # plot significance drop here for the last round # only now do we actually have the new data to # calculate significance drop if rnd.n > 1: svg = (pngname % 'SIG_DROP').replace('.png', '.svg') # noqa: F821 plot.significance_drop( svg, oldsignificances, newsignificances, # noqa: F821 title=' | '.join([title, subtitle]), # noqa: F821 bbox_inches='tight') LOGGER.debug("Figure written to %s" % svg) svg = FancyPlot(svg, caption=plot.ROUND_CAPTION['SIG_DROP']) rounds[-1].plots.append(svg) oldsignificances = newsignificances # noqa: F841 # break out of the loop if the significance is below stopping point if winner.significance < minsig: LOGGER.info("Maximum signifiance below stopping point") LOGGER.debug(" (%.2f < %.2f)" % (winner.significance, minsig)) LOGGER.info("-- Rounds complete! --") break # work out the vetoes for this round allaux = auxiliary[winner.name][ auxiliary[winner.name][auxscol] >= winner.snr] winner.events = allaux coincs = allaux[core.find_coincidences(allaux['time'], primary['time'], dt=winner.window)] rnd.vetoes = winner.get_segments(allaux['time']) flag = DataQualityFlag( '%s:HVT-ROUND_%d:1' % (ifo, rnd.n), active=rnd.vetoes, known=rnd.segments, description="winner=%s, window=%s, snr=%s" % ( winner.name, winner.window, winner.snr)) segments[flag.name] = flag LOGGER.debug("Generated veto segments for round %d" % rnd.n) # link events before veto for plotting before = primary beforeaux = auxiliary[winner.name] # apply vetoes to primary primary, vetoed = core.veto(primary, rnd.vetoes) pevents.append(primary) pvetoed.append(vetoed) LOGGER.debug("Applied vetoes to primary") # record results rnd.winner = winner rnd.efficiency = (len(vetoed), len(primary) + len(vetoed)) rnd.use_percentage = (len(coincs), len(winner.events)) if rnd.n > 1: rnd.cum_efficiency = ( len(vetoed) + rounds[-1].cum_efficiency[0], rounds[0].efficiency[1]) rnd.cum_deadtime = ( rnd.deadtime[0] + rounds[-1].cum_deadtime[0], livetime) else: rnd.cum_efficiency = rnd.efficiency rnd.cum_deadtime = rnd.deadtime # apply vetoes to auxiliary if args.nproc > 1: # multiprocess # separate channel list into chunks and process each chunk pool = multiprocessing.Pool( processes=min(args.nproc, len(auxiliary.keys()))) chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc) results = pool.map(_veto, chunks) pool.close() auxiliary = results[0] for subdict in results[1:]: auxiliary.update(subdict) else: # single process auxiliary = core.veto_all(auxiliary, rnd.vetoes) LOGGER.debug("Applied vetoes to auxiliary channels") # log results LOGGER.info("""Results for round %d:\n\n winner : %s significance : %s mu : %s snr : %s dt : %s use_percentage : %s efficiency : %s deadtime : %s cum. efficiency : %s cum. deadtime : %s\n\n""" % ( rnd.n, rnd.winner.name, rnd.winner.significance, rnd.winner.mu, rnd.winner.snr, rnd.winner.window, rnd.use_percentage, rnd.efficiency, rnd.deadtime, rnd.cum_efficiency, rnd.cum_deadtime)) # write segments segfile = os.path.join( segdir, '%s-HVETO_VETO_SEGS_ROUND_%d-%d-%d.txt' % ( ifo, rnd.n, start, duration)) write_ascii_segments(segfile, rnd.vetoes) LOGGER.debug("Round %d vetoes written to %s" % (rnd.n, segfile)) rnd.files['VETO_SEGS'] = (segfile,) # write triggers trigfile = os.path.join( trigdir, '%s-HVETO_%%s_TRIGS_ROUND_%d-%d-%d.txt' % ( ifo, rnd.n, start, duration)) for tag, arr in zip( ['WINNER', 'VETOED', 'RAW'], [winner.events, vetoed, primary]): f = trigfile % tag arr.write(f, format='ascii', overwrite=True) LOGGER.debug("Round %d %s events written to %s" % (rnd.n, tag.lower(), f)) rnd.files[tag] = f # record times to omega scan if args.omega_scans: N = len(vetoed) ind = random.sample(range(0, N), min(args.omega_scans, N)) rnd.scans = vetoed[ind] LOGGER.debug("Collected %d events to omega scan:\n\n%s\n\n" % (len(rnd.scans), rnd.scans)) # -- make some plots -- pngname = os.path.join(plotdir, '%s-HVETO_%%s_ROUND_%d-%d-%d.png' % ( ifo, rnd.n, start, duration)) wname = texify(rnd.winner.name) beforel = 'Before\n[%d]' % len(before) afterl = 'After\n[%d]' % len(primary) vetoedl = 'Vetoed\n(primary)\n[%d]' % len(vetoed) beforeauxl = 'All\n[%d]' % len(beforeaux) usedl = 'Used\n(aux)\n[%d]' % len(winner.events) coincl = 'Coinc.\n[%d]' % len(coincs) title = '%s Hveto round %d' % (ifo, rnd.n) ptitle = '%s: primary impact' % title atitle = '%s: auxiliary use' % title subtitle = 'winner: %s [%d-%d]' % (wname, start, end) # before/after histogram png = pngname % 'HISTOGRAM' plot.before_after_histogram( png, before[scol], primary[scol], label1=beforel, label2=afterl, xlabel=slabel, title=ptitle, subtitle=subtitle) LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['HISTOGRAM']) rnd.plots.append(png) # snr versus time png = pngname % 'SNR_TIME' plot.veto_scatter( png, before, vetoed, x='time', y=scol, label1=beforel, label2=vetoedl, epoch=start, xlim=[start, end], ylabel=slabel, title=ptitle, subtitle=subtitle, legend_title="Primary:") LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR_TIME']) rnd.plots.append(png) # snr versus frequency png = pngname % 'SNR_%s' % fcol.upper() plot.veto_scatter( png, before, vetoed, x=fcol, y=scol, label1=beforel, label2=vetoedl, xlabel=flabel, ylabel=slabel, xlim=pfreq, title=ptitle, subtitle=subtitle, legend_title="Primary:") LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR']) rnd.plots.append(png) # frequency versus time coloured by SNR png = pngname % '%s_TIME' % fcol.upper() plot.veto_scatter( png, before, vetoed, x='time', y=fcol, color=scol, label1=None, label2=None, ylabel=flabel, clabel=slabel, clim=[3, 100], cmap='YlGnBu', epoch=start, xlim=[start, end], ylim=pfreq, title=ptitle, subtitle=subtitle) LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['TIME']) rnd.plots.append(png) # aux used versus frequency png = pngname % 'USED_SNR_TIME' plot.veto_scatter( png, winner.events, vetoed, x='time', y=[auxscol, scol], label1=usedl, label2=vetoedl, ylabel=slabel, epoch=start, xlim=[start, end], title=atitle, subtitle=subtitle) LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['USED_SNR_TIME']) rnd.plots.append(png) # snr versus time png = pngname % 'AUX_SNR_TIME' plot.veto_scatter( png, beforeaux, (winner.events, coincs), x='time', y=auxscol, label1=beforeauxl, label2=(usedl, coincl), epoch=start, xlim=[start, end], ylabel=auxslabel, title=atitle, subtitle=subtitle) LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_TIME']) rnd.plots.append(png) # snr versus frequency png = pngname % 'AUX_SNR_FREQUENCY' plot.veto_scatter( png, beforeaux, (winner.events, coincs), x=auxfcol, y=auxscol, label1=beforeauxl, label2=(usedl, coincl), xlabel=auxflabel, ylabel=auxslabel, title=atitle, subtitle=subtitle, legend_title="Aux:") LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_FREQUENCY']) rnd.plots.append(png) # frequency versus time coloured by SNR png = pngname % 'AUX_FREQUENCY_TIME' plot.veto_scatter( png, beforeaux, (winner.events, coincs), x='time', y=auxfcol, color=auxscol, label1=None, label2=[None, None], ylabel=auxflabel, clabel=auxslabel, clim=[3, 100], cmap='YlGnBu', epoch=start, xlim=[start, end], title=atitle, subtitle=subtitle) LOGGER.debug("Figure written to %s" % png) png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_FREQUENCY_TIME']) rnd.plots.append(png) # move to the next round rounds.append(rnd) rnd = core.HvetoRound(rnd.n + 1, pchannel, rank=scol, segments=rnd.segments-rnd.vetoes) # write file with all segments segfile = os.path.join( segdir, '%s-HVETO_SEGMENTS-%d-%d.h5' % (ifo, start, duration)) segments.write(segfile, overwrite=True) LOGGER.debug("Segment summary written to %s" % segfile) LOGGER.debug("Making summary figures...") # -- exit early if no rounds above threshold if not rounds: message = ("No rounds completed above threshold. Analysis stopped " "with %s achieving significance of %.2f" % (winner.name, winner.significance)) LOGGER.critical(message) message = message.replace( winner.name, cis_link(winner.name, class_='alert-link')) message += '<br>[T<sub>win</sub>: %ss, SNR: %s]' % ( winner.window, winner.snr) htmlv['context'] = 'warning' index = html.write_null_page(ifo, start, end, message, **htmlv) LOGGER.info("HTML report written to %s" % index) sys.exit(0) # -- plot all rounds impact pngname = os.path.join(plotdir, '%s-HVETO_%%s_ALL_ROUNDS-%d-%d.png' % ( ifo, start, duration)) plots = [] title = '%s Hveto all rounds' % args.ifo subtitle = '%d rounds | %d-%d' % (len(rounds), start, end) # before/after histogram png = pngname % 'HISTOGRAM' beforel = 'Before analysis [%d events]' % len(pevents[0]) afterl = 'After %d rounds [%d]' % (len(pevents) - 1, len(pevents[-1])) plot.before_after_histogram( png, pevents[0][scol], pevents[-1][scol], label1=beforel, label2=afterl, xlabel=slabel, title=title, subtitle=subtitle) png = FancyPlot(png, caption=plot.HEADER_CAPTION['HISTOGRAM']) plots.append(png) LOGGER.debug("Figure written to %s" % png) # efficiency/deadtime curve png = pngname % 'ROC' plot.hveto_roc(png, rounds, title=title, subtitle=subtitle) png = FancyPlot(png, caption=plot.HEADER_CAPTION['ROC']) plots.append(png) LOGGER.debug("Figure written to %s" % png) # frequency versus time png = pngname % '%s_TIME' % fcol.upper() labels = [str(r.n) for r in rounds] legtitle = 'Vetoed at\nround' plot.veto_scatter( png, pevents[0], pvetoed, label1='', label2=labels, title=title, subtitle=subtitle, ylabel=flabel, x='time', y=fcol, epoch=start, xlim=[start, end], legend_title=legtitle) png = FancyPlot(png, caption=plot.HEADER_CAPTION['TIME']) plots.append(png) LOGGER.debug("Figure written to %s" % png) # snr versus time png = pngname % 'SNR_TIME' plot.veto_scatter( png, pevents[0], pvetoed, label1='', label2=labels, title=title, subtitle=subtitle, ylabel=slabel, x='time', y=scol, epoch=start, xlim=[start, end], legend_title=legtitle) png = FancyPlot(png, caption=plot.HEADER_CAPTION['SNR_TIME']) plots.append(png) LOGGER.debug("Figure written to %s" % png) # -- write summary states to ASCII table and JSON json_ = { 'user': getuser(), 'host': getfqdn(), 'date': str(datetime.datetime.now()), 'configuration': inifile, 'ifo': ifo, 'gpsstart': start, 'gpsend': end, 'call': ' '.join(sys.argv), 'rounds': [], } with open('summary-stats.txt', 'w') as f: # print header print('#N winner window SNR significance nveto use-percentage ' 'efficiency deadtime cumulative-efficiency cumulative-deadtime', file=f) for r in rounds: # extract relevant statistics results = [ ('round', r.n), ('name', r.winner.name), ('window', r.winner.window), ('snr', r.winner.snr), ('significance', r.winner.significance), ('nveto', r.efficiency[0]), ('use-percentage', r.use_percentage[0] / r.use_percentage[1] * 100.), ('efficiency', r.efficiency[0] / r.efficiency[1] * 100.), ('deadtime', r.deadtime[0] / r.deadtime[1] * 100.), ('cumulative-efficiency', r.cum_efficiency[0] / r.cum_efficiency[1] * 100.), ('cumulative-deadtime', r.cum_deadtime[0] / r.cum_deadtime[1] * 100.), ] # write to ASCII print(' '.join(map(str, list(zip(*results))[1])), file=f) # write to JSON results.append(('files', r.files)) json_['rounds'].append(dict(results)) LOGGER.debug("Summary table written to %s" % f.name) with open('summary-stats.json', 'w') as f: json.dump(json_, f, sort_keys=True) LOGGER.debug("Summary JSON written to %s" % f.name) # -- generate workflow for omega scans if args.omega_scans: omegatimes = list(map(str, sorted(numpy.unique( [t['time'] for r in rounds for t in r.scans])))) LOGGER.debug("Collected %d times to omega scan" % len(omegatimes)) newtimes = [t for t in omegatimes if not os.path.exists(os.path.join(omegadir, str(t)))] LOGGER.debug("%d scans already complete or in progress, %d remaining" % (len(omegatimes) - len(newtimes), len(newtimes))) if len(newtimes) > 0: LOGGER.info('Creating workflow for omega scans') flags = batch.get_command_line_flags( ifo=ifo, ignore_state_flags=True) condorcmds = batch.get_condor_arguments( timeout=4, extra_commands=["request_disk='1G'"], gps=start) batch.generate_dag( newtimes, flags=flags, submit=True, outdir=omegadir, condor_commands=condorcmds) LOGGER.info('Launched {} omega scans to condor'.format( len(newtimes))) else: LOGGER.debug('Skipping omega scans') # -- write HTML and finish index = html.write_hveto_page( ifo, start, end, rounds, plots, winners=[r.winner.name for r in rounds], **htmlv) LOGGER.debug("HTML written to %s" % index) LOGGER.debug("Analysis completed in %d seconds" % (time.time() - JOBSTART)) LOGGER.info("-- Hveto complete --")
def main(args=None): parser = create_parser() args = parser.parse_args(args=args) # apply verbosity to logger args.verbose = max(5 - args.verbose, 0) logger.setLevel(args.verbose * 10) # validate command line arguments if args.ifo is None: parser.error("Cannot determine IFO prefix from sytem, " "please pass --ifo on the command line") if args.executable is None: parser.error("Cannot find omicron on path, please pass " "--executable on the command line") # validate processing options if all((args.skip_root_merge, args.skip_hdf5_merge, args.skip_ligolw_add, args.skip_gzip, not args.archive)): args.skip_postprocessing = True if args.archive: argsd = vars(args) for arg in [ 'skip-root-merge', 'skip-hdf5-merge', 'skip-ligolw-add', 'skip-gzip' ]: if argsd[arg.replace('-', '_')]: parser.error("Cannot use --%s with --archive" % arg) # check conflicts if args.gps is None and args.cache_file is not None: parser.error("Cannot use --cache-file in 'online' mode, " "please use --cache-file with --gps") # extract key variables ifo = args.ifo group = args.group online = args.gps is None # format file-tag as underscore-delimited upper-case string filetag = args.file_tag if filetag: filetag = re.sub(r'[:_\s-]', '_', filetag).rstrip('_').strip('_') if const.OMICRON_FILETAG.lower() in filetag.lower(): afiletag = filetag else: afiletag = '%s_%s' % (filetag, const.OMICRON_FILETAG.upper()) filetag = '_%s' % filetag else: filetag = '' afiletag = const.OMICRON_FILETAG.upper() logger.info("--- Welcome to the Omicron processor ---") # set up containers to keep track of files that we create here tempfiles = [] keepfiles = [] # check rescue against --dagman-option force if args.rescue and args.dagman_option.count('force') > 1: parser.error('--rescue is incompatible with --dagman-option force') elif args.rescue: args.dagman_option.pop(0) logger.info( "Running in RESCUE mode - the workflow will be " "re-generated in memory without any files being written", ) # set omicron version for future use omicronv = utils.get_omicron_version(args.executable) const.OMICRON_VERSION = str(omicronv) os.environ.setdefault('OMICRON_VERSION', str(omicronv)) logger.debug('Omicron version: %s' % omicronv) # -- parse configuration file and get parameters -------------------------- cp = configparser.ConfigParser() cp.read(args.config_file) # validate if not cp.has_section(group): raise configparser.NoSectionError(group) # get params channels = cp.get(group, 'channels').strip('\n').rstrip('\n').split('\n') try: # allow two-column 'channel samplerate' format channels, crates = zip(*[c.split(' ', 1) for c in channels]) except ValueError: crates = [] else: crates = set(crates) logger.debug("%d channels read" % len(channels)) for i in range(len(channels) - 1, -1, -1): # remove excluded channels c = channels[i] if c in args.exclude_channel: channels.pop(i) logger.debug(" removed %r" % c) logger.debug("%d channels to process" % len(channels)) cp.set(group, 'channels', '\n'.join(channels)) frametype = cp.get(group, 'frametype') logger.debug("frametype = %s" % frametype) chunkdur = cp.getint(group, 'chunk-duration') logger.debug("chunkdur = %s" % chunkdur) segdur = cp.getint(group, 'segment-duration') logger.debug("segdur = %s" % segdur) overlap = cp.getint(group, 'overlap-duration') logger.debug("overlap = %s" % overlap) padding = int(overlap / 2) logger.debug("padding = %s" % padding) try: frange = tuple(map(float, cp.get(group, 'frequency-range').split())) except configparser.NoOptionError as e: try: flow = cp.getfloat(group, 'flow') fhigh = cp.getfloat(group, 'flow') except configparser.NoOptionError: raise e frange = (flow, fhigh) logger.debug('frequencyrange = [%s, %s)' % tuple(frange)) try: sampling = cp.getfloat(group, 'sample-frequency') except configparser.NoOptionError: if len(crates) == 1: sampling = float(crates[0]) elif len(crates) > 1: raise ValueError( "No sample-frequency parameter given, and multiple " "sample frequencies parsed from channels list, " "cannot continue", ) else: sampling = None if sampling: logger.debug('samplingfrequency = %s' % sampling) # get state channel try: statechannel = cp.get(group, 'state-channel') except configparser.NoOptionError: statechannel = None else: try: statebits = list( map( float, cp.get(group, 'state-bits').split(','), )) except configparser.NoOptionError: statebits = [0] try: stateft = cp.get(group, 'state-frametype') except configparser.NoOptionError as e: e.args = ('%s, this must be specified if state-channel is given' % str(e), ) raise # get state flag (if given) try: stateflag = cp.get(group, 'state-flag') except configparser.NoOptionError: stateflag = None else: logger.debug("State flag = %s" % stateflag) if not statechannel: # map state flag to state channel try: statechannel, statebits, stateft = ( segments.STATE_CHANNEL[stateflag]) except KeyError as e: if online or args.no_segdb: # only raise if channel required e.args = ('Cannot map state flag %r to channel' % stateflag, ) raise else: pass if statechannel: logger.debug("State channel = %s" % statechannel) logger.debug("State bits = %s" % ', '.join(map(str, statebits))) logger.debug("State frametype = %s" % stateft) # parse padding for state segments if statechannel or stateflag: try: statepad = cp.get(group, 'state-padding') except configparser.NoOptionError: statepad = (0, 0) else: try: p = int(statepad) except ValueError: statepad = tuple(map(float, statepad.split(',', 1))) else: statepad = (p, p) logger.debug("State padding: %s" % str(statepad)) rundir = utils.get_output_path(args) # convert to omicron parameters format oconfig = parameters.OmicronParameters.from_channel_list_config( cp, group, version=omicronv) # and validate things oconfig.validate() # -- set directories ------------------------------------------------------ rundir.mkdir(exist_ok=True, parents=True) logger.info("Using run directory\n%s" % rundir) cachedir = rundir / "cache" condir = rundir / "condor" logdir = rundir / "logs" pardir = rundir / "parameters" trigdir = rundir / "triggers" for d in [cachedir, condir, logdir, pardir, trigdir]: d.mkdir(exist_ok=True) oconfig.set('OUTPUT', 'DIRECTORY', str(trigdir)) # -- check for an existing process ---------------------------------------- dagpath = condir / "{}.dag".format(DAG_TAG) # check dagman lock file running = condor.dag_is_running(dagpath) if running: msg = "Detected {} already running in {}".format( dagpath, rundir, ) if not args.reattach: raise RuntimeError(msg) logger.info("{}, will reattach".format(msg)) else: args.reattach = False # check dagman rescue files nrescue = len( list(condir.glob("{}.rescue[0-9][0-9][0-9]".format(dagpath.name), ))) if args.rescue and not nrescue: raise RuntimeError( "--rescue given but no rescue DAG files found for {}".format( dagpath, ), ) if nrescue and not args.rescue and "force" not in args.dagman_option: raise RuntimeError( "rescue DAGs found for {} but `--rescue` not given and " "`--dagman-option force` not given, cannot continue".format( dagpath, ), ) newdag = not args.rescue and not args.reattach # -- find run segment ----------------------------------------------------- segfile = str(rundir / "segments.txt") keepfiles.append(segfile) if newdag and online: # get limit of available data (allowing for padding) end = data.get_latest_data_gps(ifo, frametype) - padding try: # start from where we got to last time start = segments.get_last_run_segment(segfile)[1] except IOError: # otherwise start with a sensible amount of data if args.use_dev_shm: # process one chunk logger.debug("No online segment record, starting with " "%s seconds" % chunkdur) start = end - chunkdur + padding else: # process the last 4000 seconds (arbitrarily) logger.debug("No online segment record, starting with " "4000 seconds") start = end - 4000 else: logger.debug("Online segment record recovered") elif online: start, end = segments.get_last_run_segment(segfile) else: start, end = args.gps duration = end - start datastart = start - padding dataend = end + padding dataduration = dataend - datastart logger.info("Processing segment determined as") logger.info(" %d %d" % (datastart, dataend)) logger.info("Duration = %d seconds" % dataduration) span = (start, end) # -- find segments and frame files ---------------------------------------- # minimum allowed duration is one full chunk minduration = 1 * chunkdur # validate span is long enough if dataduration < minduration and online: logger.info("Segment is too short (%d < %d), please try again later" % (duration, minduration)) clean_exit(0, tempfiles) elif dataduration < minduration: raise ValueError( "Segment [%d, %d) is too short (%d < %d), please " "extend the segment, or shorten the timing parameters." % (start, end, duration, chunkdur - padding * 2), ) # -- find run segments # get segments from state vector if (online and statechannel) or (statechannel and not stateflag) or (statechannel and args.no_segdb): logger.info("Finding segments for relevant state...") if statebits == "guardian": # use guardian segs = segments.get_guardian_segments( statechannel, stateft, datastart, dataend, pad=statepad, ) else: segs = segments.get_state_segments( statechannel, stateft, datastart, dataend, bits=statebits, pad=statepad, ) # get segments from segment database elif stateflag: logger.info("Querying segments for relevant state...") segs = segments.query_state_segments(stateflag, datastart, dataend, pad=statepad) # get segments from frame availability else: segs = segments.get_frame_segments(ifo, frametype, datastart, dataend) # print frame segments recovered if len(segs): logger.info("State/frame segments recovered as") for seg in segs: logger.info(" %d %d [%d]" % (seg[0], seg[1], abs(seg))) logger.info("Duration = %d seconds" % abs(segs)) # if running online, we want to avoid processing up to the extent of # available data, so that the next run doesn't get left with a segment that # is too short to process # There are a few reasons this might be # - the interferometer loses lock a short time after the end of this run # - a restart/other problem means that a frame is missing a short time # after the end of this run # so, work out whether we need to truncate: try: lastseg = segs[-1] except IndexError: truncate = False else: truncate = online and newdag and lastseg[1] == dataend # if final segment is shorter than two chunks, remove it entirely # so that it gets processed next time (when it will either a closed # segment, or long enough to process safely) if truncate and abs(lastseg) < chunkdur * 2: logger.info( "The final segment is too short, but ends at the limit of " "available data, presumably this is an active segment. It " "will be removed so that it can be processed properly later", ) segs = type(segs)(segs[:-1]) dataend = lastseg[0] # otherwise, we remove the final chunk (so that the next run has at # least that on which to operate), then truncate to an integer number # of chunks (so that # PSD estimation operates on a consistent amount # of data) elif truncate: logger.info("The final segment touches the limit of available data, " "the end chunk will be removed to guarantee that the next " "online run has enough data over which to operate") t, e = lastseg e -= chunkdur + padding # remove one chunk # now truncate to an integer number of chunks step = chunkdur while t + chunkdur <= e: t += step step = chunkdur - overlap segs[-1] = type(segs[-1])(lastseg[0], t) dataend = segs[-1][1] logger.info("This analysis will now run to %d" % dataend) # recalculate the processing segment dataspan = type(segs)([segments.Segment(datastart, dataend)]) # -- find the frames # find frames under /dev/shm (which creates a cache of temporary files) if args.cache_file: cache = read_cache(str(args.cache_file)) # only cache if we have state segments elif args.use_dev_shm and len(segs): cache = data.find_frames(ifo, frametype, datastart, dataend, on_gaps='warn', tmpdir=cachedir) # remove cached files at end of process tempfiles.extend(filter(lambda p: str(cachedir) in p, cache)) # find frames using datafind else: cache = data.find_frames(ifo, frametype, datastart, dataend, on_gaps='warn') # if not frames for an online run, panic if not online and len(cache) == 0: raise RuntimeError("No frames found for %s-%s" % (ifo[0], frametype)) # work out the segments of data available try: cachesegs = (segments.cache_segments(cache) & dataspan).coalesce() except TypeError: # empty cache cachesegs = type(dataspan)() alldata = False else: try: alldata = cachesegs[-1][1] >= dataspan[-1][1] except IndexError: # no data overlapping span alldata = False # write cache of frames (only if creating a new DAG) cachefile = cachedir / "frames.lcf" keepfiles.append(cachefile) if newdag: data.write_cache(cache, cachefile) oconfig.set('DATA', 'FFL', str(cachefile)) logger.info("Cache of %d frames written to\n%s" % (len(cache), cachefile)) # restrict analysis to available data (and warn about missing data) if segs - cachesegs: logger.warning("Not all state times are available in frames") segs = (cachesegs & segs).coalesce() # apply minimum duration requirement segs = type(segs)(s for s in segs if abs(s) >= segdur) # if all of the data are available, but no analysable segments were found # (i.e. IFO not in right state for all times), record segments.txt if newdag and len(segs) == 0 and online and alldata: logger.info( "No analysable segments found, but up-to-date data are " "available. A segments.txt file will be written so we don't " "have to search these data again", ) segments.write_segments(cachesegs, segfile) logger.info("Segments written to\n%s" % segfile) clean_exit(0, tempfiles) # otherwise not all data are available, so elif len(segs) == 0 and online: logger.info("No analysable segments found, please try again later") clean_exit(0, tempfiles) elif len(segs) == 0: raise RuntimeError("No analysable segments found") # and calculate trigger output segments trigsegs = type(segs)(type(s)(*s) for s in segs).contract(padding) # display segments logger.info("Final data segments selected as") for seg in segs: logger.info(" %d %d " % seg + "[%d]" % abs(seg)) logger.info("Duration = %d seconds" % abs(segs)) span = type(trigsegs)([trigsegs.extent()]) logger.info("This will output triggers for") for seg in trigsegs: logger.info(" %d %d " % seg + "[%d]" % abs(seg)) logger.info("Duration = %d seconds" % abs(trigsegs)) # -- config omicron config directory -------------------------------------- tempfiles.append(utils.astropy_config_path(rundir)) # -- make parameters files then generate the DAG -------------------------- fileformats = oconfig.output_formats() # generate a 'master' parameters.txt file for archival purposes if not newdag: # if not writing new dag, dump parameters.txt files to /tmp pardir = gettempdir() parfile, jobfiles = oconfig.write_distributed( pardir, nchannels=args.max_channels_per_job) logger.debug("Created master parameters file\n%s" % parfile) if newdag: keepfiles.append(parfile) # create dag dag = pipeline.CondorDAG(str(logdir / "{}.log".format(DAG_TAG))) dag.set_dag_file(str(dagpath.with_suffix(""))) # set up condor commands for all jobs condorcmds = { 'accounting_group': args.condor_accounting_group, 'accounting_group_user': args.condor_accounting_group_user } for cmd_ in args.condor_command: key, value = cmd_.split('=', 1) condorcmds[key.rstrip().lower()] = value.strip() # create omicron job reqmem = condorcmds.pop('request_memory', 1000) ojob = condor.OmicronProcessJob(args.universe, args.executable, subdir=condir, logdir=logdir, **condorcmds) ojob.add_condor_cmd('request_memory', reqmem) ojob.add_condor_cmd('+OmicronProcess', '"%s"' % group) # create post-processing job ppjob = condor.OmicronProcessJob(args.universe, find_executable('bash'), subdir=condir, logdir=logdir, tag='post-processing', **condorcmds) ppjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) ppjob.add_short_opt('e', '') ppnodes = [] rootmerge = find_executable('omicron-root-merge') hdf5merge = find_executable('omicron-hdf5-merge') ligolw_add = find_executable('ligolw_add') gzip = find_executable('gzip') # create node to remove files rmjob = condor.OmicronProcessJob(args.universe, str(condir / "post-process-rm.sh"), subdir=condir, logdir=logdir, tag='post-processing-rm', **condorcmds) rm = find_executable('rm') rmfiles = [] rmjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) if args.archive: archivejob = condor.OmicronProcessJob(args.universe, str(condir / "archive.sh"), subdir=condir, logdir=logdir, tag='archive', **condorcmds) archivejob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group) archivefiles = {} # loop over data segments for s, e in segs: # build trigger segment ts = s + padding te = e - padding td = te - ts # distribute segment across multiple nodes nodesegs = oconfig.distribute_segment(s, e, nperjob=args.max_chunks_per_job) omicronfiles = {} # build node for each parameter file for i, pf in enumerate(jobfiles): chanlist = jobfiles[pf] nodes = [] # loop over distributed segments for subseg in nodesegs: if not args.skip_omicron: # work out files for this job nodefiles = oconfig.output_files(*subseg) # build node node = pipeline.CondorDAGNode(ojob) node.set_category('omicron') node.set_retry(args.condor_retry) node.add_var_arg(str(subseg[0])) node.add_var_arg(str(subseg[1])) node.add_file_arg(pf) for chan in chanlist: for form, flist in nodefiles[chan].items(): # record file as output from this node for f in flist: node._CondorDAGNode__output_files.append(f) # record file as output for this channel try: omicronfiles[chan][form].extend(flist) except KeyError: try: omicronfiles[chan][form] = flist except KeyError: omicronfiles[chan] = {form: flist} dag.add_node(node) nodes.append(node) # post-process (one post-processing job per channel # per data segment) if not args.skip_postprocessing: script = condir / "post-process-{}-{}-{}.sh".format(i, s, e) ppnode = pipeline.CondorDAGNode(ppjob) ppnode.add_var_arg(str(script)) operations = [] # build post-processing nodes for each channel for c in chanlist: operations.append('\n# %s' % c) chandir = trigdir / c # work out filenames for coalesced files archpath = Path( io.get_archive_filename( c, ts, td, filetag=afiletag, ext='root', )) mergepath = chandir / archpath.name target = str(archpath.parent) # add ROOT operations if 'root' in fileformats: rootfiles = ' '.join(omicronfiles[c]['root']) for f in omicronfiles[c]['root']: ppnode._CondorDAGNode__input_files.append(f) if args.skip_root_merge or (len( omicronfiles[c]['root']) == 1): root = rootfiles else: root = str(mergepath) operations.append('%s %s %s --strict' % (rootmerge, rootfiles, root)) rmfiles.append(rootfiles) ppnode._CondorDAGNode__output_files.append(root) if args.archive: try: archivefiles[target].append(root) except KeyError: archivefiles[target] = [root] rmfiles.append(root) # add HDF5 operations if 'hdf5' in fileformats: hdf5files = ' '.join(omicronfiles[c]['hdf5']) for f in omicronfiles[c]['hdf5']: ppnode._CondorDAGNode__input_files.append(f) if args.skip_hdf5_merge or (len( omicronfiles[c]['hdf5']) == 1): hdf5 = hdf5files else: hdf5 = str(mergepath.with_suffix(".h5")) operations.append( '{cmd} {infiles} {outfile}'.format( cmd=hdf5merge, infiles=hdf5files, outfile=hdf5, ), ) rmfiles.append(hdf5files) ppnode._CondorDAGNode__output_files.append(hdf5) if args.archive: try: archivefiles[target].append(hdf5) except KeyError: archivefiles[target] = [hdf5] rmfiles.append(hdf5) # add LIGO_LW operations if 'xml' in fileformats: xmlfiles = ' '.join(omicronfiles[c]['xml']) for f in omicronfiles[c]['xml']: ppnode._CondorDAGNode__input_files.append(f) if (args.skip_ligolw_add or len(omicronfiles[c]['xml']) == 1): xml = xmlfiles else: xml = str(mergepath.with_suffix(".xml")) operations.append( '%s %s --ilwdchar-compat --output %s' % (ligolw_add, xmlfiles, xml), ) rmfiles.append(xmlfiles) ppnode._CondorDAGNode__output_files.append(xml) if not args.skip_gzip: operations.append( '%s --force --stdout %s > %s.gz' % (gzip, xml, xml)) rmfiles.append(xml) xml = str(mergepath.with_suffix(".xml.gz")) ppnode._CondorDAGNode__output_files.append(xml) if args.archive: try: archivefiles[target].append(xml) except KeyError: archivefiles[target] = [xml] rmfiles.append(xml) # add ASCII operations if 'txt' in fileformats: txtfiles = ' '.join(omicronfiles[c]['txt']) for f in omicronfiles[c]['txt']: ppnode._CondorDAGNode__input_files.append(f) if args.archive: try: archivefiles[target].append(txtfiles) except KeyError: archivefiles[target] = [txtfiles] rmfiles.append(txtfiles) ppnode.set_category('postprocessing') ppnode.set_retry(str(args.condor_retry)) if not args.skip_omicron: for node in nodes: ppnode.add_parent(node) dag.add_node(ppnode) ppnodes.append(ppnode) tempfiles.append(script) # write post-processing file if not args.rescue: with script.open("w") as f: # add header print('#!/bin/bash -e\n#', file=f) print("# omicron-process post-processing", file=f) print( '#\n# File created by\n# {}\n#'.format( ' '.join(sys.argv), ), file=f, ) print("# Group: %s" % group, file=f) print("# Segment: [%d, %d)" % (s, e), file=f) print("# Channels:\n#", file=f) for c in chanlist: print('# %s' % c, file=f) # add post-processing operations print('\n'.join(operations), file=f) if newdag: script.chmod(0o755) # set 'strict' option for Omicron # this is done after the nodes are written so that 'strict' is last in # the call ojob.add_arg('strict') # do all archiving last, once all post-processing has completed if args.archive: archivenode = pipeline.CondorDAGNode(archivejob) acache = {fmt: list() for fmt in fileformats} if newdag: # write shell script to seed archive with open(archivejob.get_executable(), 'w') as f: print('#!/bin/bash -e\n', file=f) for gpsdir, filelist in archivefiles.items(): for fn in filelist: archivenode._CondorDAGNode__input_files.append(fn) # write 'mv' op to script print("mkdir -p %s" % gpsdir, file=f) print("cp %s %s" % (' '.join(filelist), gpsdir), file=f) # record archived files in caches filenames = [ str(Path(gpsdir) / x.name) for x in map(Path, filelist) ] for fn in filenames: archivenode._CondorDAGNode__output_files.append(fn) for fmt, extensions in { 'xml': ('.xml.gz', '.xml'), 'root': '.root', 'hdf5': '.h5', 'txt': '.txt', }.items(): try: acache[fmt].extend( filter(lambda x: x.endswith(extensions), filenames)) except KeyError: # file format not used continue os.chmod(archivejob.get_executable(), 0o755) # write caches to disk for fmt, fcache in acache.items(): cachefile = cachedir / "omicron-{0}.lcf".format(fmt) data.write_cache(fcache, cachefile) logger.debug("{0} cache written to {1}".format(fmt, cachefile)) # add node to DAG for node in ppnodes: archivenode.add_parent(node) archivenode.set_retry(args.condor_retry) archivenode.set_category('archive') dag.add_node(archivenode) tempfiles.append(archivejob.get_executable()) # add rm job right at the end rmnode = pipeline.CondorDAGNode(rmjob) rmscript = rmjob.get_executable() with open(rmscript, 'w') as f: print('#!/bin/bash -e\n#', file=f) print("# omicron-process post-processing-rm", file=f) print('#\n# File created by\n# %s\n#' % ' '.join(sys.argv), file=f) print("# Group: %s" % group, file=f) print("# Segment: [%d, %d)" % (s, e), file=f) print("# Channels:\n#", file=f) for c in channels: print('# %s' % c, file=f) print('', file=f) for rmset in rmfiles: print('%s -f %s' % (rm, rmset), file=f) if newdag: os.chmod(rmscript, 0o755) tempfiles.append(rmscript) rmnode.set_category('postprocessing') if args.archive: # run this after archiving rmnode.add_parent(archivenode) else: # or just after post-processing if not archiving for node in ppnodes: rmnode.add_parent(node) dag.add_node(rmnode) # print DAG to file dagfile = Path(dag.get_dag_file()).resolve(strict=False) if args.rescue: logger.info( "In --rescue mode, this DAG has been reproduced in memory " "for safety, but will not be written to disk, the file is:", ) elif newdag: dag.write_sub_files() dag.write_dag() dag.write_script() with open(dagfile, 'a') as f: print("DOT", dagfile.with_suffix(".dot"), file=f) logger.info("Dag with %d nodes written to" % len(dag.get_nodes())) print(dagfile) if args.no_submit: if newdag: segments.write_segments(span, segfile) logger.info("Segments written to\n%s" % segfile) sys.exit(0) # -- submit the DAG and babysit ------------------------------------------- # submit DAG if args.rescue: logger.info("--- Submitting rescue DAG to condor ----") elif args.reattach: logger.info("--- Reattaching to existing DAG --------") else: logger.info("--- Submitting DAG to condor -----------") for i in range(args.submit_rescue_dag + 1): if args.reattach: # find ID of existing DAG dagid = int( condor.find_job(Owner=getuser(), OmicronDAGMan=group)['ClusterId']) logger.info("Found existing condor ID = %d" % dagid) else: # or submit DAG dagmanargs = set() if online: dagmanopts = {'-append': '+OmicronDAGMan=\"%s\"' % group} else: dagmanopts = {} for x in args.dagman_option: x = '-%s' % x try: key, val = x.split('=', 1) except ValueError: dagmanargs.add(x) else: dagmanopts[key] = val dagid = condor.submit_dag( str(dagfile), *list(dagmanargs), **dagmanopts, ) logger.info("Condor ID = %d" % dagid) # write segments now -- this means that online processing will # _always_ move on even if the workflow fails if i == 0: segments.write_segments(span, segfile) logger.info("Segments written to\n%s" % segfile) if 'force' in args.dagman_option: args.dagman_option.pop(args.dagman_option.index('force')) # monitor the dag logger.debug("----------------------------------------") logger.info("Monitoring DAG:") check_call([ "pycondor", "monitor", "--time", "5", "--length", "36", str(dagfile), ]) print() logger.debug("----------------------------------------") sleep(5) try: stat = condor.get_dag_status(dagid) except OSError as exc: # query failed logger.warning(str(exc)) stat = {} # log exitcode if "exitcode" not in stat: logger.warning("DAG has exited, status unknown") break if not stat["exitcode"]: logger.info("DAG has exited with status {}".format( stat.get("exitcode", "unknown"), )) break logger.critical( "DAG has exited with status {}".format(stat['exitcode']), ) # handle failure if i == args.submit_rescue_dag: raise RuntimeError("DAG has failed to complete %d times" % (args.submit_rescue_dag + 1)) else: rescue = condor.find_rescue_dag(str(dagfile)) logger.warning("Rescue DAG %s was generated" % rescue) # mark output and error files of condor nodes that passed to be deleted try: for node, files in condor.get_out_err_files(dagid, exitcode=0).items(): tempfiles.extend(files) except RuntimeError: pass # archive files stub = '%d-%d' % (start, end) for f in map(Path, ["{}.dagman.out".format(dagfile)] + keepfiles): archive = logdir / "{0[0]}.{1}.{0[1]}".format( f.name.split(".", 1), stub, ) if str(f) == str(segfile): shutil.copyfile(f, archive) else: f.rename(archive) logger.debug("Archived path\n{} --> {}".format(f, archive)) # clean up temporary files tempfiles.extend(trigdir.glob("ffconvert.*.ffl")) clean_tempfiles(tempfiles) # and exit logger.info("--- Processing complete ----------------")