Пример #1
0
    def test_read_write_cache(self):
        cache = self.make_cache()[0]
        with tempfile.NamedTemporaryFile() as f:
            io_cache.write_cache(cache, f)
            f.seek(0)

            # read from fileobj
            c2 = io_cache.read_cache(f)
            assert cache == c2

            # write with file name
            io_cache.write_cache(cache, f.name)

            # read from file name
            c3 = io_cache.read_cache(f.name)
            assert cache == c3
Пример #2
0
    def test_read_write_cache(self):
        cache = self.make_cache()[0]
        with tempfile.NamedTemporaryFile() as f:
            io_cache.write_cache(cache, f)
            f.seek(0)

            # read from fileobj
            c2 = io_cache.read_cache(f)
            assert cache == c2

            # write with file name
            io_cache.write_cache(cache, f.name)

            # read from file name
            c3 = io_cache.read_cache(f.name)
            assert cache == c3
Пример #3
0
def test_write_lal_cache(tmpdir):
    cache = [
        "/test/path/X-TEST-0-1.txt",
        "/test/path/X-TEST-2-3.txt",
    ]
    target = tmpdir.join("cache.lcf")
    utils.write_lal_cache(str(target), cache)
    assert read_cache(str(target)) == cache
Пример #4
0
def test_write_lal_cache(tmpdir):
    cache = [
        "/test/path/X-TEST-0-1.txt",
        "/test/path/X-TEST-2-3.txt",
    ]
    target = tmpdir.join("cache.lcf")
    utils.write_lal_cache(str(target), cache)
    assert read_cache(str(target)) == cache
Пример #5
0
def main(args=None):
    """Run the cache_events tool
    """
    parser = create_parser()
    args = parser.parse_args(args=args)

    ifo = args.ifo
    start = int(args.gpsstart)
    end = int(args.gpsend)
    duration = end - start

    LOGGER.info("-- Welcome to Hveto --")
    LOGGER.info("GPS start time: %d" % start)
    LOGGER.info("GPS end time: %d" % end)
    LOGGER.info("Interferometer: %s" % ifo)

    # -- initialisation -------------------------------

    # read configuration
    cp = config.HvetoConfigParser(ifo=args.ifo)
    cp.read(map(str, args.config_file))
    LOGGER.info("Parsed configuration file(s)")

    # format output directory
    outdir = args.output_directory
    outdir.mkdir(parents=True, exist_ok=True)
    LOGGER.info("Working directory: {}".format(outdir))
    trigdir = outdir / 'triggers'
    trigdir.mkdir(parents=True, exist_ok=True)

    # get segments
    aflag = cp.get('segments', 'analysis-flag')
    url = cp.get('segments', 'url')
    padding = cp.getfloats('segments', 'padding')
    if args.analysis_segments:
        segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float)
        analysis = segs_[aflag]
        span = SegmentList([Segment(start, end)])
        analysis.active &= span
        analysis.known &= span
        analysis.coalesce()
        LOGGER.debug("Segments read from disk")
    else:
        analysis = DataQualityFlag.query(aflag, start, end, url=url)
        LOGGER.debug("Segments recovered from %s" % url)
    analysis.pad(*padding)
    livetime = int(abs(analysis.active))
    livetimepc = livetime / duration * 100.
    LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime" %
                (len(analysis.active), aflag, livetime, livetimepc))

    snrs = cp.getfloats('hveto', 'snr-thresholds')
    minsnr = min(snrs)

    # -- utility methods ------------------------------

    def create_path(channel):
        ifo, name = channel.split(':', 1)
        name = name.replace('-', '_')
        return trigdir / "{}-{}-{}-{}.h5".format(ifo, name, start, duration)

    def read_and_cache_events(channel,
                              etg,
                              cache=None,
                              trigfind_kw={},
                              **read_kw):
        cfile = create_path(channel)
        # read existing cached triggers and work out new segments to query
        if args.append and cfile.is_file():
            previous = DataQualityFlag.read(
                str(cfile),
                path='segments',
                format='hdf5',
            ).coalesce()
            new = analysis - previous
        else:
            new = analysis.copy()
        # get cache of files
        if cache is None:
            cache = find_trigger_files(channel, etg, new.active, **trigfind_kw)
        else:
            cache = list(
                filter(
                    lambda e: new.active.intersects_segment(file_segment(e)),
                    cache,
                ))
        # restrict 'active' segments to when we have data
        try:
            new.active &= cache_segments(cache)
        except IndexError:
            new.active = type(new.active)()
        # find new triggers
        try:
            trigs = get_triggers(channel,
                                 etg,
                                 new.active,
                                 cache=cache,
                                 raw=True,
                                 **read_kw)
        # catch error and continue
        except ValueError as e:
            warnings.warn('%s: %s' % (type(e).__name__, str(e)))
        else:
            path = write_events(channel, trigs, new)
            try:
                return path, len(trigs)
            except TypeError:  # None
                return

    def write_events(channel, tab, segments):
        """Write events to file with a given filename
        """
        # get filename
        path = create_path(channel)
        h5f = h5py.File(str(path), 'a')

        # read existing table from file
        try:
            old = tab.read(h5f["triggers"], format="hdf5")
        except KeyError:
            pass
        else:
            tab = vstack(old, tab)

        # append event table
        tab.write(h5f, path="triggers", append=True, overwrite=True)

        # write segments
        try:
            oldsegs = DataQualityFlag.read(h5f, path="segments", format="hdf5")
        except KeyError:
            pass
        else:
            segments = oldsegs + segments
        segments.write(h5f, path="segments", append=True, overwrite=True)

        # write file to disk
        h5f.close()
        return path

    # -- load channels --------------------------------

    # get primary channel name
    pchannel = cp.get('primary', 'channel')

    # read auxiliary cache
    if args.auxiliary_cache is not None:
        acache = [e for c in args.auxiliary_cache for e in read_cache(str(c))]
    else:
        acache = None

    # load auxiliary channels
    auxetg = cp.get('auxiliary', 'trigger-generator')
    auxfreq = cp.getfloats('auxiliary', 'frequency-range')
    try:
        auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n')
    except config.configparser.NoOptionError:
        auxchannels = find_auxiliary_channels(auxetg,
                                              start,
                                              ifo=args.ifo,
                                              cache=acache)

    # load unsafe channels list
    _unsafe = cp.get('safety', 'unsafe-channels')
    if os.path.isfile(_unsafe):  # from file
        unsafe = set()
        with open(_unsafe, 'rb') as f:
            for c in f.read().rstrip('\n').split('\n'):
                if c.startswith('%(IFO)s'):
                    unsafe.add(c.replace('%(IFO)s', ifo))
                elif not c.startswith('%s:' % ifo):
                    unsafe.add('%s:%s' % (ifo, c))
                else:
                    unsafe.add(c)
    else:  # or from line-seprated list
        unsafe = set(_unsafe.strip('\n').split('\n'))
    unsafe.add(pchannel)
    cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe)))
    LOGGER.debug("Read list of %d unsafe channels" % len(unsafe))

    # remove duplicates
    auxchannels = sorted(set(auxchannels))
    LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels))

    # remove unsafe channels
    nunsafe = 0
    for i in range(len(auxchannels) - 1, -1, -1):
        if auxchannels[i] in unsafe:
            LOGGER.warning("Auxiliary channel %r identified as unsafe and has "
                           "been removed" % auxchannels[i])
            auxchannels.pop(i)
            nunsafe += 1
    LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe)
    naux = len(auxchannels)
    LOGGER.info("Identified %d auxiliary channels to process" % naux)

    # -- load primary triggers -------------------------

    LOGGER.info("Reading events for primary channel...")

    # read primary cache
    if args.primary_cache is not None:
        pcache = [e for c in args.primary_cache for e in read_cache(str(c))]
    else:
        pcache = None

    # get primary params
    petg = cp.get('primary', 'trigger-generator')
    psnr = cp.getfloat('primary', 'snr-threshold')
    pfreq = cp.getfloats('primary', 'frequency-range')
    preadkw = cp.getparams('primary', 'read-')
    ptrigfindkw = cp.getparams('primary', 'trigfind-')

    # load primary triggers
    out = read_and_cache_events(pchannel,
                                petg,
                                snr=psnr,
                                frange=pfreq,
                                cache=pcache,
                                trigfind_kw=ptrigfindkw,
                                **preadkw)
    try:
        e, n = out
    except TypeError:
        e = None
        n = 0
    if n:
        LOGGER.info("Cached %d new events for %s" % (n, pchannel))
    elif args.append and e.is_file():
        LOGGER.info("Cached 0 new events for %s" % pchannel)
    else:
        message = "No events found for %r in %d seconds of livetime" % (
            pchannel, livetime)
        LOGGER.critical(message)

    # write primary to local cache
    pname = trigdir / '{}-HVETO_PRIMARY_CACHE-{}-{}.lcf'.format(
        ifo,
        start,
        duration,
    )
    write_lal_cache(str(pname), [e])
    LOGGER.info('Primary cache written to {}'.format(pname))

    # -- load auxiliary triggers -----------------------

    LOGGER.info("Reading triggers for aux channels...")
    counter = multiprocessing.Value('i', 0)

    areadkw = cp.getparams('auxiliary', 'read-')
    atrigfindkw = cp.getparams('auxiliary', 'trigfind-')

    def read_and_write_aux_triggers(channel):
        if acache is None:
            auxcache = None
        else:
            ifo, name = channel.split(':')
            match = "{}-{}".format(ifo, name.replace('-', '_'))
            auxcache = [e for e in acache if Path(e).name.startswith(match)]

        out = read_and_cache_events(channel,
                                    auxetg,
                                    cache=auxcache,
                                    snr=minsnr,
                                    frange=auxfreq,
                                    trigfind_kw=atrigfindkw,
                                    **areadkw)
        try:
            e, n = out
        except TypeError:
            e = None
            n = 0
        # log result of load
        with counter.get_lock():
            counter.value += 1
            tag = '[%d/%d]' % (counter.value, naux)
            if e is None:  # something went wrong
                LOGGER.critical("    %s Failed to read events for %s" %
                                (tag, channel))
            else:  # either read events or nothing new
                LOGGER.debug("    %s Cached %d new events for %s" %
                             (tag, n, channel))
        return e

    # map with multiprocessing
    if args.nproc > 1:
        pool = multiprocessing.Pool(processes=args.nproc)
        results = pool.map(read_and_write_aux_triggers, auxchannels)
        pool.close()
    # map without multiprocessing
    else:
        results = map(read_and_write_aux_triggers, auxchannels)

    acache = [x for x in results if x is not None]
    aname = trigdir / '{}-HVETO_AUXILIARY_CACHE-{}-{}.lcf'.format(
        ifo,
        start,
        duration,
    )
    write_lal_cache(str(aname), acache)
    LOGGER.info('Auxiliary cache written to {}'.format(aname))

    # -- finish ----------------------------------------

    LOGGER.info('Done, you can use these cache files in an hveto analysis by '
                'passing the following arguments:\n\n--primary-cache {} '
                '--auxiliary-cache {}\n'.format(pname, aname))
Пример #6
0
def main(args=None):
    """Run the hveto command-line interface
    """
    # declare global variables
    # this is needed for multiprocessing utilities
    global acache, analysis, areadkw, atrigfindkw, auxiliary, auxetg
    global auxfreq, counter, livetime, minsnr, naux, pchannel, primary
    global rnd, snrs, windows

    # parse command-line
    parser = create_parser()
    args = parser.parse_args(args=args)
    ifo = args.ifo
    start = int(args.gpsstart)
    end = int(args.gpsend)
    duration = end - start

    # log startup
    LOGGER.info("-- Welcome to Hveto --")
    LOGGER.info("GPS start time: %d" % start)
    LOGGER.info("GPS end time: %d" % end)
    LOGGER.info("Interferometer: %s" % ifo)

    # -- initialisation -------------------------

    # read configuration
    cp = config.HvetoConfigParser(ifo=ifo)
    cp.read(args.config_file)
    LOGGER.info("Parsed configuration file(s)")

    # format output directory
    outdir = _abs_path(args.output_directory)
    if not os.path.isdir(outdir):
        os.makedirs(outdir)
    os.chdir(outdir)
    LOGGER.info("Working directory: %s" % outdir)
    segdir = 'segments'
    plotdir = 'plots'
    trigdir = 'triggers'
    omegadir = 'scans'
    for d in [segdir, plotdir, trigdir, omegadir]:
        if not os.path.isdir(d):
            os.makedirs(d)

    # prepare html variables
    htmlv = {
        'title': '%s Hveto | %d-%d' % (ifo, start, end),
        'config': None,
        'prog': PROG,
        'context': ifo.lower(),
    }

    # get segments
    aflag = cp.get('segments', 'analysis-flag')
    url = cp.get('segments', 'url')
    padding = tuple(cp.getfloats('segments', 'padding'))
    if args.analysis_segments:
        segs_ = DataQualityDict.read(args.analysis_segments, gpstype=float)
        analysis = segs_[aflag]
        span = SegmentList([Segment(start, end)])
        analysis.active &= span
        analysis.known &= span
        analysis.coalesce()
        LOGGER.debug("Segments read from disk")
    else:
        analysis = DataQualityFlag.query(aflag, start, end, url=url)
        LOGGER.debug("Segments recovered from %s" % url)
    if padding != (0, 0):
        mindur = padding[0] - padding[1]
        analysis.active = type(analysis.active)([s for s in analysis.active if
                                                 abs(s) >= mindur])
        analysis.pad(*padding, inplace=True)
        LOGGER.debug("Padding %s applied" % str(padding))
    livetime = int(abs(analysis.active))
    livetimepc = livetime / duration * 100.
    LOGGER.info("Retrieved %d segments for %s with %ss (%.2f%%) livetime"
                % (len(analysis.active), aflag, livetime, livetimepc))

    # apply vetoes from veto-definer file
    try:
        vetofile = cp.get('segments', 'veto-definer-file')
    except configparser.NoOptionError:
        vetofile = None
    else:
        try:
            categories = cp.getfloats('segments', 'veto-definer-categories')
        except configparser.NoOptionError:
            categories = None
        # read file
        vdf = read_veto_definer_file(vetofile, start=start, end=end, ifo=ifo)
        LOGGER.debug("Read veto-definer file from %s" % vetofile)
        # get vetoes from segdb
        vdf.populate(source=url, segments=analysis.active, on_error='warn')
        # coalesce flags from chosen categories
        vetoes = DataQualityFlag('%s:VDF-VETOES:1' % ifo)
        nflags = 0
        for flag in vdf:
            if not categories or vdf[flag].category in categories:
                vetoes += vdf[flag]
                nflags += 1
        try:
            deadtime = int(abs(vetoes.active)) / int(abs(vetoes.known)) * 100
        except ZeroDivisionError:
            deadtime = 0
        LOGGER.debug("Coalesced %ss (%.2f%%) of deadtime from %d veto flags"
                     % (abs(vetoes.active), deadtime, nflags))
        # apply to analysis segments
        analysis -= vetoes
        LOGGER.debug("Applied vetoes from veto-definer file")
        livetime = int(abs(analysis.active))
        livetimepc = livetime / duration * 100.
        LOGGER.info("%ss (%.2f%%) livetime remaining after vetoes"
                    % (livetime, livetimepc))

    snrs = cp.getfloats('hveto', 'snr-thresholds')
    minsnr = min(snrs)
    windows = cp.getfloats('hveto', 'time-windows')

    # record all segments
    segments = DataQualityDict()
    segments[analysis.name] = analysis

    # -- load channels --------------------------

    # get primary channel name
    pchannel = cp.get('primary', 'channel')

    # read auxiliary cache
    if args.auxiliary_cache is not None:
        acache = read_cache(args.auxiliary_cache)
    else:
        acache = None

    # load auxiliary channels
    auxetg = cp.get('auxiliary', 'trigger-generator')
    auxfreq = cp.getfloats('auxiliary', 'frequency-range')
    try:
        auxchannels = cp.get('auxiliary', 'channels').strip('\n').split('\n')
    except config.configparser.NoOptionError:
        auxchannels = find_auxiliary_channels(auxetg, (start, end), ifo=ifo,
                                              cache=acache)
        cp.set('auxiliary', 'channels', '\n'.join(auxchannels))
        LOGGER.debug("Auto-discovered %d "
                     "auxiliary channels" % len(auxchannels))
    else:
        auxchannels = sorted(set(auxchannels))
        LOGGER.debug("Read list of %d auxiliary channels" % len(auxchannels))

    # load unsafe channels list
    _unsafe = cp.get('safety', 'unsafe-channels')
    if os.path.isfile(_unsafe):  # from file
        unsafe = set()
        with open(_unsafe, 'rb') as f:
            for c in f.read().rstrip('\n').split('\n'):
                if c.startswith('%(IFO)s'):
                    unsafe.add(c.replace('%(IFO)s', ifo))
                elif not c.startswith('%s:' % ifo):
                    unsafe.add('%s:%s' % (ifo, c))
                else:
                    unsafe.add(c)
    else:  # or from line-seprated list
        unsafe = set(_unsafe.strip('\n').split('\n'))
    unsafe.add(pchannel)
    cp.set('safety', 'unsafe-channels', '\n'.join(sorted(unsafe)))
    LOGGER.debug("Read list of %d unsafe channels" % len(unsafe))

    # remove unsafe channels
    nunsafe = 0
    for i in range(len(auxchannels) - 1, -1, -1):
        if auxchannels[i] in unsafe:
            LOGGER.warning("Auxiliary channel %r identified as unsafe and has "
                           "been removed" % auxchannels[i])
            auxchannels.pop(i)
            nunsafe += 1
    LOGGER.debug("%d auxiliary channels identified as unsafe" % nunsafe)
    naux = len(auxchannels)
    LOGGER.info("Identified %d auxiliary channels to process" % naux)

    # record INI file in output HTML directory
    inifile = '%s-HVETO_CONFIGURATION-%d-%d.ini' % (ifo, start, duration)
    if os.path.isfile(inifile) and any(
            os.path.samefile(inifile, x) for x in args.config_file):
        LOGGER.debug("Cannot write INI file to %s, file was given as input")
    else:
        with open(inifile, 'w') as f:
            cp.write(f)
        LOGGER.info("Configuration recorded as %s" % inifile)
    htmlv['config'] = inifile

    # -- load primary triggers ------------------

    # read primary cache
    if args.primary_cache is not None:
        pcache = read_cache(args.primary_cache)
    else:
        pcache = None

    # load primary triggers
    petg = cp.get('primary', 'trigger-generator')
    psnr = cp.getfloat('primary', 'snr-threshold')
    pfreq = cp.getfloats('primary', 'frequency-range')
    preadkw = cp.getparams('primary', 'read-')
    if pcache is not None:  # auto-detect the file format
        LOGGER.debug('Unsetting the primary trigger file format')
        preadkw['format'] = None
        preadkw['path'] = 'triggers'
    ptrigfindkw = cp.getparams('primary', 'trigfind-')
    primary = get_triggers(pchannel, petg, analysis.active, snr=psnr,
                           frange=pfreq, cache=pcache, nproc=args.nproc,
                           trigfind_kwargs=ptrigfindkw, **preadkw)
    fcol, scol = primary.dtype.names[1:3]

    if len(primary):
        LOGGER.info("Read %d events for %s" % (len(primary), pchannel))
    else:
        message = "No events found for %r in %d seconds of livetime" % (
           pchannel, livetime)
        LOGGER.critical(message)

    # cluster primary triggers
    clusterkwargs = cp.getparams('primary', 'cluster-')
    if clusterkwargs:
        primary = primary.cluster(**clusterkwargs)
        LOGGER.info("%d primary events remain after clustering over %s" %
                    (len(primary), clusterkwargs['rank']))

    # -- bail out early -------------------------
    # the bail out is done here so that we can at least generate the eventual
    # configuration file, mainly for HTML purposes

    # no segments
    if livetime == 0:
        message = ("No active segments found for analysis flag %r in interval "
                   "[%d, %d)" % (aflag, start, end))
        LOGGER.critical(message)
        htmlv['context'] = 'info'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # no primary triggers
    if len(primary) == 0:
        htmlv['context'] = 'danger'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # otherwise write all primary triggers to ASCII
    trigfile = os.path.join(
        trigdir,
        '%s-HVETO_RAW_TRIGS_ROUND_0-%d-%d.txt' % (ifo, start, duration),
    )
    primary.write(trigfile, format='ascii', overwrite=True)

    # -- load auxiliary triggers ----------------

    LOGGER.info("Reading triggers for aux channels...")
    counter = multiprocessing.Value('i', 0)

    areadkw = cp.getparams('auxiliary', 'read-')
    if acache is not None:  # auto-detect the file format
        LOGGER.debug('Unsetting the auxiliary trigger file format')
        areadkw['format'] = None
        areadkw['path'] = 'triggers'
    atrigfindkw = cp.getparams('auxiliary', 'trigfind-')

    # map with multiprocessing
    if args.nproc > 1:
        pool = multiprocessing.Pool(processes=args.nproc)
        results = pool.map(_get_aux_triggers, auxchannels)
        pool.close()
    # map without multiprocessing
    else:
        results = map(_get_aux_triggers, auxchannels)

    LOGGER.info("All aux events loaded")

    auxiliary = dict(x for x in results if x is not None)
    auxchannels = sorted(auxiliary.keys())
    chanfile = '%s-HVETO_CHANNEL_LIST-%d-%d.txt' % (ifo, start, duration)
    with open(chanfile, 'w') as f:
        for chan in auxchannels:
            print(chan, file=f)
    LOGGER.info("Recorded list of valid auxiliary channels in %s" % chanfile)

    # -- execute hveto analysis -----------------

    minsig = cp.getfloat('hveto', 'minimum-significance')

    pevents = [primary]
    pvetoed = []

    auxfcol, auxscol = auxiliary[auxchannels[0]].dtype.names[1:3]
    slabel = plot.get_column_label(scol)
    flabel = plot.get_column_label(fcol)
    auxslabel = plot.get_column_label(auxscol)
    auxflabel = plot.get_column_label(auxfcol)

    rounds = []
    rnd = core.HvetoRound(1, pchannel, rank=scol)
    rnd.segments = analysis.active

    while True:
        LOGGER.info("-- Processing round %d --" % rnd.n)

        # write segments for this round
        segfile = os.path.join(
            segdir, '%s-HVETO_ANALYSIS_SEGS_ROUND_%d-%d-%d.txt'
                    % (ifo, rnd.n, start, duration))
        write_ascii_segments(segfile, rnd.segments)

        # calculate significances for this round
        if args.nproc > 1:  # multiprocessing
            # separate channel list into chunks and process each chunk
            pool = multiprocessing.Pool(
                processes=min(args.nproc, len(auxiliary.keys())))
            chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc)
            results = pool.map(_find_max_significance, chunks)
            pool.close()
            winners, sigsets = zip(*results)
            # find winner of chunk winners
            winner = sorted(winners, key=lambda w: w.significance)[-1]
            # flatten sets of significances into one list
            newsignificances = sigsets[0]
            for subdict in sigsets[1:]:
                newsignificances.update(subdict)
        else:  # single process
            winner, newsignificances = core.find_max_significance(
                primary, auxiliary, pchannel, snrs, windows, rnd.livetime)

        LOGGER.info("Round %d winner: %s" % (rnd.n, winner.name))

        # plot significance drop here for the last round
        #   only now do we actually have the new data to
        #   calculate significance drop
        if rnd.n > 1:
            svg = (pngname % 'SIG_DROP').replace('.png', '.svg')  # noqa: F821
            plot.significance_drop(
                svg, oldsignificances, newsignificances,  # noqa: F821
                title=' | '.join([title, subtitle]),  # noqa: F821
                bbox_inches='tight')
            LOGGER.debug("Figure written to %s" % svg)
            svg = FancyPlot(svg, caption=plot.ROUND_CAPTION['SIG_DROP'])
            rounds[-1].plots.append(svg)
        oldsignificances = newsignificances  # noqa: F841

        # break out of the loop if the significance is below stopping point
        if winner.significance < minsig:
            LOGGER.info("Maximum signifiance below stopping point")
            LOGGER.debug("    (%.2f < %.2f)" % (winner.significance, minsig))
            LOGGER.info("-- Rounds complete! --")
            break

        # work out the vetoes for this round
        allaux = auxiliary[winner.name][
            auxiliary[winner.name][auxscol] >= winner.snr]
        winner.events = allaux
        coincs = allaux[core.find_coincidences(allaux['time'], primary['time'],
                                               dt=winner.window)]
        rnd.vetoes = winner.get_segments(allaux['time'])
        flag = DataQualityFlag(
            '%s:HVT-ROUND_%d:1' % (ifo, rnd.n), active=rnd.vetoes,
            known=rnd.segments,
            description="winner=%s, window=%s, snr=%s" % (
                winner.name, winner.window, winner.snr))
        segments[flag.name] = flag
        LOGGER.debug("Generated veto segments for round %d" % rnd.n)

        # link events before veto for plotting
        before = primary
        beforeaux = auxiliary[winner.name]

        # apply vetoes to primary
        primary, vetoed = core.veto(primary, rnd.vetoes)
        pevents.append(primary)
        pvetoed.append(vetoed)
        LOGGER.debug("Applied vetoes to primary")

        # record results
        rnd.winner = winner
        rnd.efficiency = (len(vetoed), len(primary) + len(vetoed))
        rnd.use_percentage = (len(coincs), len(winner.events))
        if rnd.n > 1:
            rnd.cum_efficiency = (
                len(vetoed) + rounds[-1].cum_efficiency[0],
                rounds[0].efficiency[1])
            rnd.cum_deadtime = (
                rnd.deadtime[0] + rounds[-1].cum_deadtime[0],
                livetime)
        else:
            rnd.cum_efficiency = rnd.efficiency
            rnd.cum_deadtime = rnd.deadtime

        # apply vetoes to auxiliary
        if args.nproc > 1:  # multiprocess
            # separate channel list into chunks and process each chunk
            pool = multiprocessing.Pool(
                processes=min(args.nproc, len(auxiliary.keys())))
            chunks = utils.channel_groups(list(auxiliary.keys()), args.nproc)
            results = pool.map(_veto, chunks)
            pool.close()
            auxiliary = results[0]
            for subdict in results[1:]:
                auxiliary.update(subdict)
        else:  # single process
            auxiliary = core.veto_all(auxiliary, rnd.vetoes)
        LOGGER.debug("Applied vetoes to auxiliary channels")

        # log results
        LOGGER.info("""Results for round %d:\n\n
    winner :          %s
    significance :    %s
    mu :              %s
    snr :             %s
    dt :              %s
    use_percentage :  %s
    efficiency :      %s
    deadtime :        %s
    cum. efficiency : %s
    cum. deadtime :   %s\n\n""" % (
            rnd.n, rnd.winner.name, rnd.winner.significance,
            rnd.winner.mu, rnd.winner.snr, rnd.winner.window,
            rnd.use_percentage, rnd.efficiency, rnd.deadtime,
            rnd.cum_efficiency, rnd.cum_deadtime))

        # write segments
        segfile = os.path.join(
            segdir,
            '%s-HVETO_VETO_SEGS_ROUND_%d-%d-%d.txt' % (
                ifo, rnd.n, start, duration))
        write_ascii_segments(segfile, rnd.vetoes)
        LOGGER.debug("Round %d vetoes written to %s" % (rnd.n, segfile))
        rnd.files['VETO_SEGS'] = (segfile,)
        # write triggers
        trigfile = os.path.join(
            trigdir,
            '%s-HVETO_%%s_TRIGS_ROUND_%d-%d-%d.txt' % (
                ifo, rnd.n, start, duration))
        for tag, arr in zip(
                ['WINNER', 'VETOED', 'RAW'],
                [winner.events, vetoed, primary]):
            f = trigfile % tag
            arr.write(f, format='ascii', overwrite=True)
            LOGGER.debug("Round %d %s events written to %s"
                         % (rnd.n, tag.lower(), f))
            rnd.files[tag] = f

        # record times to omega scan
        if args.omega_scans:
            N = len(vetoed)
            ind = random.sample(range(0, N), min(args.omega_scans, N))
            rnd.scans = vetoed[ind]
            LOGGER.debug("Collected %d events to omega scan:\n\n%s\n\n"
                         % (len(rnd.scans), rnd.scans))

        # -- make some plots --

        pngname = os.path.join(plotdir, '%s-HVETO_%%s_ROUND_%d-%d-%d.png' % (
            ifo, rnd.n, start, duration))
        wname = texify(rnd.winner.name)
        beforel = 'Before\n[%d]' % len(before)
        afterl = 'After\n[%d]' % len(primary)
        vetoedl = 'Vetoed\n(primary)\n[%d]' % len(vetoed)
        beforeauxl = 'All\n[%d]' % len(beforeaux)
        usedl = 'Used\n(aux)\n[%d]' % len(winner.events)
        coincl = 'Coinc.\n[%d]' % len(coincs)
        title = '%s Hveto round %d' % (ifo, rnd.n)
        ptitle = '%s: primary impact' % title
        atitle = '%s: auxiliary use' % title
        subtitle = 'winner: %s [%d-%d]' % (wname, start, end)

        # before/after histogram
        png = pngname % 'HISTOGRAM'
        plot.before_after_histogram(
            png, before[scol], primary[scol],
            label1=beforel, label2=afterl, xlabel=slabel,
            title=ptitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['HISTOGRAM'])
        rnd.plots.append(png)

        # snr versus time
        png = pngname % 'SNR_TIME'
        plot.veto_scatter(
            png, before, vetoed, x='time', y=scol, label1=beforel,
            label2=vetoedl, epoch=start, xlim=[start, end], ylabel=slabel,
            title=ptitle, subtitle=subtitle, legend_title="Primary:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR_TIME'])
        rnd.plots.append(png)

        # snr versus frequency
        png = pngname % 'SNR_%s' % fcol.upper()
        plot.veto_scatter(
            png, before, vetoed, x=fcol, y=scol, label1=beforel,
            label2=vetoedl, xlabel=flabel, ylabel=slabel, xlim=pfreq,
            title=ptitle, subtitle=subtitle, legend_title="Primary:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['SNR'])
        rnd.plots.append(png)

        # frequency versus time coloured by SNR
        png = pngname % '%s_TIME' % fcol.upper()
        plot.veto_scatter(
            png, before, vetoed, x='time', y=fcol, color=scol,
            label1=None, label2=None, ylabel=flabel,
            clabel=slabel, clim=[3, 100], cmap='YlGnBu',
            epoch=start, xlim=[start, end], ylim=pfreq,
            title=ptitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['TIME'])
        rnd.plots.append(png)

        # aux used versus frequency
        png = pngname % 'USED_SNR_TIME'
        plot.veto_scatter(
            png, winner.events, vetoed, x='time', y=[auxscol, scol],
            label1=usedl, label2=vetoedl, ylabel=slabel, epoch=start,
            xlim=[start, end], title=atitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['USED_SNR_TIME'])
        rnd.plots.append(png)

        # snr versus time
        png = pngname % 'AUX_SNR_TIME'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x='time', y=auxscol,
            label1=beforeauxl, label2=(usedl, coincl), epoch=start,
            xlim=[start, end], ylabel=auxslabel, title=atitle,
            subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_TIME'])
        rnd.plots.append(png)

        # snr versus frequency
        png = pngname % 'AUX_SNR_FREQUENCY'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x=auxfcol, y=auxscol,
            label1=beforeauxl, label2=(usedl, coincl), xlabel=auxflabel,
            ylabel=auxslabel, title=atitle, subtitle=subtitle,
            legend_title="Aux:")
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_SNR_FREQUENCY'])
        rnd.plots.append(png)

        # frequency versus time coloured by SNR
        png = pngname % 'AUX_FREQUENCY_TIME'
        plot.veto_scatter(
            png, beforeaux, (winner.events, coincs), x='time', y=auxfcol,
            color=auxscol, label1=None, label2=[None, None], ylabel=auxflabel,
            clabel=auxslabel, clim=[3, 100], cmap='YlGnBu', epoch=start,
            xlim=[start, end], title=atitle, subtitle=subtitle)
        LOGGER.debug("Figure written to %s" % png)
        png = FancyPlot(png, caption=plot.ROUND_CAPTION['AUX_FREQUENCY_TIME'])
        rnd.plots.append(png)

        # move to the next round
        rounds.append(rnd)
        rnd = core.HvetoRound(rnd.n + 1, pchannel, rank=scol,
                              segments=rnd.segments-rnd.vetoes)

    # write file with all segments
    segfile = os.path.join(
        segdir, '%s-HVETO_SEGMENTS-%d-%d.h5' % (ifo, start, duration))
    segments.write(segfile, overwrite=True)
    LOGGER.debug("Segment summary written to %s" % segfile)

    LOGGER.debug("Making summary figures...")

    # -- exit early if no rounds above threshold

    if not rounds:
        message = ("No rounds completed above threshold. Analysis stopped "
                   "with %s achieving significance of %.2f"
                   % (winner.name, winner.significance))
        LOGGER.critical(message)
        message = message.replace(
            winner.name, cis_link(winner.name, class_='alert-link'))
        message += '<br>[T<sub>win</sub>: %ss, SNR: %s]' % (
            winner.window, winner.snr)
        htmlv['context'] = 'warning'
        index = html.write_null_page(ifo, start, end, message, **htmlv)
        LOGGER.info("HTML report written to %s" % index)
        sys.exit(0)

    # -- plot all rounds impact
    pngname = os.path.join(plotdir, '%s-HVETO_%%s_ALL_ROUNDS-%d-%d.png' % (
        ifo, start, duration))
    plots = []
    title = '%s Hveto all rounds' % args.ifo
    subtitle = '%d rounds | %d-%d' % (len(rounds), start, end)

    # before/after histogram
    png = pngname % 'HISTOGRAM'
    beforel = 'Before analysis [%d events]' % len(pevents[0])
    afterl = 'After %d rounds [%d]' % (len(pevents) - 1, len(pevents[-1]))
    plot.before_after_histogram(
        png, pevents[0][scol], pevents[-1][scol],
        label1=beforel, label2=afterl, xlabel=slabel,
        title=title, subtitle=subtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['HISTOGRAM'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # efficiency/deadtime curve
    png = pngname % 'ROC'
    plot.hveto_roc(png, rounds, title=title, subtitle=subtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['ROC'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # frequency versus time
    png = pngname % '%s_TIME' % fcol.upper()
    labels = [str(r.n) for r in rounds]
    legtitle = 'Vetoed at\nround'
    plot.veto_scatter(
        png, pevents[0], pvetoed,
        label1='', label2=labels, title=title,
        subtitle=subtitle, ylabel=flabel, x='time', y=fcol,
        epoch=start, xlim=[start, end], legend_title=legtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['TIME'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # snr versus time
    png = pngname % 'SNR_TIME'
    plot.veto_scatter(
        png, pevents[0], pvetoed, label1='', label2=labels, title=title,
        subtitle=subtitle, ylabel=slabel, x='time', y=scol,
        epoch=start, xlim=[start, end], legend_title=legtitle)
    png = FancyPlot(png, caption=plot.HEADER_CAPTION['SNR_TIME'])
    plots.append(png)
    LOGGER.debug("Figure written to %s" % png)

    # -- write summary states to ASCII table and JSON
    json_ = {
        'user': getuser(),
        'host': getfqdn(),
        'date': str(datetime.datetime.now()),
        'configuration': inifile,
        'ifo': ifo,
        'gpsstart': start,
        'gpsend': end,
        'call': ' '.join(sys.argv),
        'rounds': [],
    }
    with open('summary-stats.txt', 'w') as f:
        # print header
        print('#N winner window SNR significance nveto use-percentage '
              'efficiency deadtime cumulative-efficiency cumulative-deadtime',
              file=f)
        for r in rounds:
            # extract relevant statistics
            results = [
                ('round', r.n),
                ('name', r.winner.name),
                ('window', r.winner.window),
                ('snr', r.winner.snr),
                ('significance', r.winner.significance),
                ('nveto', r.efficiency[0]),
                ('use-percentage',
                    r.use_percentage[0] / r.use_percentage[1] * 100.),
                ('efficiency', r.efficiency[0] / r.efficiency[1] * 100.),
                ('deadtime', r.deadtime[0] / r.deadtime[1] * 100.),
                ('cumulative-efficiency',
                    r.cum_efficiency[0] / r.cum_efficiency[1] * 100.),
                ('cumulative-deadtime',
                    r.cum_deadtime[0] / r.cum_deadtime[1] * 100.),
            ]
            # write to ASCII
            print(' '.join(map(str, list(zip(*results))[1])), file=f)
            # write to JSON
            results.append(('files', r.files))
            json_['rounds'].append(dict(results))
    LOGGER.debug("Summary table written to %s" % f.name)

    with open('summary-stats.json', 'w') as f:
        json.dump(json_, f, sort_keys=True)
    LOGGER.debug("Summary JSON written to %s" % f.name)

    # -- generate workflow for omega scans

    if args.omega_scans:
        omegatimes = list(map(str, sorted(numpy.unique(
            [t['time'] for r in rounds for t in r.scans]))))
        LOGGER.debug("Collected %d times to omega scan" % len(omegatimes))
        newtimes = [t for t in omegatimes if not
                    os.path.exists(os.path.join(omegadir, str(t)))]
        LOGGER.debug("%d scans already complete or in progress, %d remaining"
                     % (len(omegatimes) - len(newtimes), len(newtimes)))
        if len(newtimes) > 0:
            LOGGER.info('Creating workflow for omega scans')
            flags = batch.get_command_line_flags(
                ifo=ifo,
                ignore_state_flags=True)
            condorcmds = batch.get_condor_arguments(
                timeout=4,
                extra_commands=["request_disk='1G'"],
                gps=start)
            batch.generate_dag(
                newtimes,
                flags=flags,
                submit=True,
                outdir=omegadir,
                condor_commands=condorcmds)
            LOGGER.info('Launched {} omega scans to condor'.format(
                len(newtimes)))
        else:
            LOGGER.debug('Skipping omega scans')

    # -- write HTML and finish

    index = html.write_hveto_page(
        ifo, start, end, rounds, plots,
        winners=[r.winner.name for r in rounds], **htmlv)
    LOGGER.debug("HTML written to %s" % index)
    LOGGER.debug("Analysis completed in %d seconds" % (time.time() - JOBSTART))
    LOGGER.info("-- Hveto complete --")
Пример #7
0
def main(args=None):

    parser = create_parser()
    args = parser.parse_args(args=args)

    # apply verbosity to logger
    args.verbose = max(5 - args.verbose, 0)
    logger.setLevel(args.verbose * 10)

    # validate command line arguments
    if args.ifo is None:
        parser.error("Cannot determine IFO prefix from sytem, "
                     "please pass --ifo on the command line")
    if args.executable is None:
        parser.error("Cannot find omicron on path, please pass "
                     "--executable on the command line")

    # validate processing options
    if all((args.skip_root_merge, args.skip_hdf5_merge, args.skip_ligolw_add,
            args.skip_gzip, not args.archive)):
        args.skip_postprocessing = True
    if args.archive:
        argsd = vars(args)
        for arg in [
                'skip-root-merge', 'skip-hdf5-merge', 'skip-ligolw-add',
                'skip-gzip'
        ]:
            if argsd[arg.replace('-', '_')]:
                parser.error("Cannot use --%s with --archive" % arg)

    # check conflicts
    if args.gps is None and args.cache_file is not None:
        parser.error("Cannot use --cache-file in 'online' mode, "
                     "please use --cache-file with --gps")

    # extract key variables
    ifo = args.ifo
    group = args.group
    online = args.gps is None

    # format file-tag as underscore-delimited upper-case string
    filetag = args.file_tag
    if filetag:
        filetag = re.sub(r'[:_\s-]', '_', filetag).rstrip('_').strip('_')
        if const.OMICRON_FILETAG.lower() in filetag.lower():
            afiletag = filetag
        else:
            afiletag = '%s_%s' % (filetag, const.OMICRON_FILETAG.upper())
        filetag = '_%s' % filetag
    else:
        filetag = ''
        afiletag = const.OMICRON_FILETAG.upper()

    logger.info("--- Welcome to the Omicron processor ---")

    # set up containers to keep track of files that we create here
    tempfiles = []
    keepfiles = []

    # check rescue against --dagman-option force
    if args.rescue and args.dagman_option.count('force') > 1:
        parser.error('--rescue is incompatible with --dagman-option force')
    elif args.rescue:
        args.dagman_option.pop(0)
        logger.info(
            "Running in RESCUE mode - the workflow will be "
            "re-generated in memory without any files being written", )

    # set omicron version for future use
    omicronv = utils.get_omicron_version(args.executable)
    const.OMICRON_VERSION = str(omicronv)
    os.environ.setdefault('OMICRON_VERSION', str(omicronv))
    logger.debug('Omicron version: %s' % omicronv)

    # -- parse configuration file and get parameters --------------------------

    cp = configparser.ConfigParser()
    cp.read(args.config_file)

    # validate
    if not cp.has_section(group):
        raise configparser.NoSectionError(group)

    # get params
    channels = cp.get(group, 'channels').strip('\n').rstrip('\n').split('\n')
    try:  # allow two-column 'channel samplerate' format
        channels, crates = zip(*[c.split(' ', 1) for c in channels])
    except ValueError:
        crates = []
    else:
        crates = set(crates)
    logger.debug("%d channels read" % len(channels))
    for i in range(len(channels) - 1, -1, -1):  # remove excluded channels
        c = channels[i]
        if c in args.exclude_channel:
            channels.pop(i)
            logger.debug("    removed %r" % c)
    logger.debug("%d channels to process" % len(channels))
    cp.set(group, 'channels', '\n'.join(channels))
    frametype = cp.get(group, 'frametype')
    logger.debug("frametype = %s" % frametype)
    chunkdur = cp.getint(group, 'chunk-duration')
    logger.debug("chunkdur = %s" % chunkdur)
    segdur = cp.getint(group, 'segment-duration')
    logger.debug("segdur = %s" % segdur)
    overlap = cp.getint(group, 'overlap-duration')
    logger.debug("overlap = %s" % overlap)
    padding = int(overlap / 2)
    logger.debug("padding = %s" % padding)
    try:
        frange = tuple(map(float, cp.get(group, 'frequency-range').split()))
    except configparser.NoOptionError as e:
        try:
            flow = cp.getfloat(group, 'flow')
            fhigh = cp.getfloat(group, 'flow')
        except configparser.NoOptionError:
            raise e
        frange = (flow, fhigh)
    logger.debug('frequencyrange = [%s, %s)' % tuple(frange))
    try:
        sampling = cp.getfloat(group, 'sample-frequency')
    except configparser.NoOptionError:
        if len(crates) == 1:
            sampling = float(crates[0])
        elif len(crates) > 1:
            raise ValueError(
                "No sample-frequency parameter given, and multiple "
                "sample frequencies parsed from channels list, "
                "cannot continue", )
        else:
            sampling = None
    if sampling:
        logger.debug('samplingfrequency = %s' % sampling)

    # get state channel
    try:
        statechannel = cp.get(group, 'state-channel')
    except configparser.NoOptionError:
        statechannel = None
    else:
        try:
            statebits = list(
                map(
                    float,
                    cp.get(group, 'state-bits').split(','),
                ))
        except configparser.NoOptionError:
            statebits = [0]
        try:
            stateft = cp.get(group, 'state-frametype')
        except configparser.NoOptionError as e:
            e.args = ('%s, this must be specified if state-channel is given' %
                      str(e), )
            raise

    # get state flag (if given)
    try:
        stateflag = cp.get(group, 'state-flag')
    except configparser.NoOptionError:
        stateflag = None
    else:
        logger.debug("State flag = %s" % stateflag)
        if not statechannel:  # map state flag to state channel
            try:
                statechannel, statebits, stateft = (
                    segments.STATE_CHANNEL[stateflag])
            except KeyError as e:
                if online or args.no_segdb:  # only raise if channel required
                    e.args = ('Cannot map state flag %r to channel' %
                              stateflag, )
                    raise
                else:
                    pass

    if statechannel:
        logger.debug("State channel = %s" % statechannel)
        logger.debug("State bits = %s" % ', '.join(map(str, statebits)))
        logger.debug("State frametype = %s" % stateft)

    # parse padding for state segments
    if statechannel or stateflag:
        try:
            statepad = cp.get(group, 'state-padding')
        except configparser.NoOptionError:
            statepad = (0, 0)
        else:
            try:
                p = int(statepad)
            except ValueError:
                statepad = tuple(map(float, statepad.split(',', 1)))
            else:
                statepad = (p, p)
        logger.debug("State padding: %s" % str(statepad))

    rundir = utils.get_output_path(args)

    # convert to omicron parameters format
    oconfig = parameters.OmicronParameters.from_channel_list_config(
        cp, group, version=omicronv)
    # and validate things
    oconfig.validate()

    # -- set directories ------------------------------------------------------

    rundir.mkdir(exist_ok=True, parents=True)
    logger.info("Using run directory\n%s" % rundir)

    cachedir = rundir / "cache"
    condir = rundir / "condor"
    logdir = rundir / "logs"
    pardir = rundir / "parameters"
    trigdir = rundir / "triggers"
    for d in [cachedir, condir, logdir, pardir, trigdir]:
        d.mkdir(exist_ok=True)

    oconfig.set('OUTPUT', 'DIRECTORY', str(trigdir))

    # -- check for an existing process ----------------------------------------

    dagpath = condir / "{}.dag".format(DAG_TAG)

    # check dagman lock file
    running = condor.dag_is_running(dagpath)
    if running:
        msg = "Detected {} already running in {}".format(
            dagpath,
            rundir,
        )
        if not args.reattach:
            raise RuntimeError(msg)
        logger.info("{}, will reattach".format(msg))
    else:
        args.reattach = False

    # check dagman rescue files
    nrescue = len(
        list(condir.glob("{}.rescue[0-9][0-9][0-9]".format(dagpath.name), )))
    if args.rescue and not nrescue:
        raise RuntimeError(
            "--rescue given but no rescue DAG files found for {}".format(
                dagpath, ), )
    if nrescue and not args.rescue and "force" not in args.dagman_option:
        raise RuntimeError(
            "rescue DAGs found for {} but `--rescue` not given and "
            "`--dagman-option force` not given, cannot continue".format(
                dagpath, ), )

    newdag = not args.rescue and not args.reattach

    # -- find run segment -----------------------------------------------------

    segfile = str(rundir / "segments.txt")
    keepfiles.append(segfile)

    if newdag and online:
        # get limit of available data (allowing for padding)
        end = data.get_latest_data_gps(ifo, frametype) - padding

        try:  # start from where we got to last time
            start = segments.get_last_run_segment(segfile)[1]
        except IOError:  # otherwise start with a sensible amount of data
            if args.use_dev_shm:  # process one chunk
                logger.debug("No online segment record, starting with "
                             "%s seconds" % chunkdur)
                start = end - chunkdur + padding
            else:  # process the last 4000 seconds (arbitrarily)
                logger.debug("No online segment record, starting with "
                             "4000 seconds")
                start = end - 4000
        else:
            logger.debug("Online segment record recovered")
    elif online:
        start, end = segments.get_last_run_segment(segfile)
    else:
        start, end = args.gps

    duration = end - start
    datastart = start - padding
    dataend = end + padding
    dataduration = dataend - datastart

    logger.info("Processing segment determined as")
    logger.info("    %d %d" % (datastart, dataend))
    logger.info("Duration = %d seconds" % dataduration)

    span = (start, end)

    # -- find segments and frame files ----------------------------------------

    # minimum allowed duration is one full chunk
    minduration = 1 * chunkdur

    # validate span is long enough
    if dataduration < minduration and online:
        logger.info("Segment is too short (%d < %d), please try again later" %
                    (duration, minduration))
        clean_exit(0, tempfiles)
    elif dataduration < minduration:
        raise ValueError(
            "Segment [%d, %d) is too short (%d < %d), please "
            "extend the segment, or shorten the timing parameters." %
            (start, end, duration, chunkdur - padding * 2), )

    # -- find run segments
    # get segments from state vector
    if (online and statechannel) or (statechannel
                                     and not stateflag) or (statechannel
                                                            and args.no_segdb):
        logger.info("Finding segments for relevant state...")
        if statebits == "guardian":  # use guardian
            segs = segments.get_guardian_segments(
                statechannel,
                stateft,
                datastart,
                dataend,
                pad=statepad,
            )
        else:
            segs = segments.get_state_segments(
                statechannel,
                stateft,
                datastart,
                dataend,
                bits=statebits,
                pad=statepad,
            )
    # get segments from segment database
    elif stateflag:
        logger.info("Querying segments for relevant state...")
        segs = segments.query_state_segments(stateflag,
                                             datastart,
                                             dataend,
                                             pad=statepad)
    # get segments from frame availability
    else:
        segs = segments.get_frame_segments(ifo, frametype, datastart, dataend)

    # print frame segments recovered
    if len(segs):
        logger.info("State/frame segments recovered as")
        for seg in segs:
            logger.info("    %d %d [%d]" % (seg[0], seg[1], abs(seg)))
        logger.info("Duration = %d seconds" % abs(segs))

    # if running online, we want to avoid processing up to the extent of
    # available data, so that the next run doesn't get left with a segment that
    # is too short to process
    # There are a few reasons this might be
    #   - the interferometer loses lock a short time after the end of this run
    #   - a restart/other problem means that a frame is missing a short time
    #     after the end of this run

    # so, work out whether we need to truncate:
    try:
        lastseg = segs[-1]
    except IndexError:
        truncate = False
    else:
        truncate = online and newdag and lastseg[1] == dataend

    # if final segment is shorter than two chunks, remove it entirely
    # so that it gets processed next time (when it will either a closed
    # segment, or long enough to process safely)
    if truncate and abs(lastseg) < chunkdur * 2:
        logger.info(
            "The final segment is too short, but ends at the limit of "
            "available data, presumably this is an active segment. It "
            "will be removed so that it can be processed properly later", )
        segs = type(segs)(segs[:-1])
        dataend = lastseg[0]
    # otherwise, we remove the final chunk (so that the next run has at
    # least that on which to operate), then truncate to an integer number
    # of chunks (so that # PSD estimation operates on a consistent amount
    # of data)
    elif truncate:
        logger.info("The final segment touches the limit of available data, "
                    "the end chunk will be removed to guarantee that the next "
                    "online run has enough data over which to operate")
        t, e = lastseg
        e -= chunkdur + padding  # remove one chunk
        # now truncate to an integer number of chunks
        step = chunkdur
        while t + chunkdur <= e:
            t += step
            step = chunkdur - overlap
        segs[-1] = type(segs[-1])(lastseg[0], t)
        dataend = segs[-1][1]
        logger.info("This analysis will now run to %d" % dataend)

    # recalculate the processing segment
    dataspan = type(segs)([segments.Segment(datastart, dataend)])

    # -- find the frames
    # find frames under /dev/shm (which creates a cache of temporary files)
    if args.cache_file:
        cache = read_cache(str(args.cache_file))
    # only cache if we have state segments
    elif args.use_dev_shm and len(segs):
        cache = data.find_frames(ifo,
                                 frametype,
                                 datastart,
                                 dataend,
                                 on_gaps='warn',
                                 tmpdir=cachedir)
        # remove cached files at end of process
        tempfiles.extend(filter(lambda p: str(cachedir) in p, cache))
    # find frames using datafind
    else:
        cache = data.find_frames(ifo,
                                 frametype,
                                 datastart,
                                 dataend,
                                 on_gaps='warn')

    # if not frames for an online run, panic
    if not online and len(cache) == 0:
        raise RuntimeError("No frames found for %s-%s" % (ifo[0], frametype))

    # work out the segments of data available
    try:
        cachesegs = (segments.cache_segments(cache) & dataspan).coalesce()
    except TypeError:  # empty cache
        cachesegs = type(dataspan)()
        alldata = False
    else:
        try:
            alldata = cachesegs[-1][1] >= dataspan[-1][1]
        except IndexError:  # no data overlapping span
            alldata = False

    # write cache of frames (only if creating a new DAG)
    cachefile = cachedir / "frames.lcf"
    keepfiles.append(cachefile)
    if newdag:
        data.write_cache(cache, cachefile)
    oconfig.set('DATA', 'FFL', str(cachefile))
    logger.info("Cache of %d frames written to\n%s" % (len(cache), cachefile))

    # restrict analysis to available data (and warn about missing data)
    if segs - cachesegs:
        logger.warning("Not all state times are available in frames")
    segs = (cachesegs & segs).coalesce()

    # apply minimum duration requirement
    segs = type(segs)(s for s in segs if abs(s) >= segdur)

    # if all of the data are available, but no analysable segments were found
    # (i.e. IFO not in right state for all times), record segments.txt
    if newdag and len(segs) == 0 and online and alldata:
        logger.info(
            "No analysable segments found, but up-to-date data are "
            "available. A segments.txt file will be written so we don't "
            "have to search these data again", )
        segments.write_segments(cachesegs, segfile)
        logger.info("Segments written to\n%s" % segfile)
        clean_exit(0, tempfiles)
    # otherwise not all data are available, so
    elif len(segs) == 0 and online:
        logger.info("No analysable segments found, please try again later")
        clean_exit(0, tempfiles)
    elif len(segs) == 0:
        raise RuntimeError("No analysable segments found")

    # and calculate trigger output segments
    trigsegs = type(segs)(type(s)(*s) for s in segs).contract(padding)

    # display segments
    logger.info("Final data segments selected as")
    for seg in segs:
        logger.info("    %d %d " % seg + "[%d]" % abs(seg))
    logger.info("Duration = %d seconds" % abs(segs))

    span = type(trigsegs)([trigsegs.extent()])

    logger.info("This will output triggers for")
    for seg in trigsegs:
        logger.info("    %d %d " % seg + "[%d]" % abs(seg))
    logger.info("Duration = %d seconds" % abs(trigsegs))

    # -- config omicron config directory --------------------------------------

    tempfiles.append(utils.astropy_config_path(rundir))

    # -- make parameters files then generate the DAG --------------------------

    fileformats = oconfig.output_formats()

    # generate a 'master' parameters.txt file for archival purposes
    if not newdag:  # if not writing new dag, dump parameters.txt files to /tmp
        pardir = gettempdir()
    parfile, jobfiles = oconfig.write_distributed(
        pardir, nchannels=args.max_channels_per_job)
    logger.debug("Created master parameters file\n%s" % parfile)
    if newdag:
        keepfiles.append(parfile)

    # create dag
    dag = pipeline.CondorDAG(str(logdir / "{}.log".format(DAG_TAG)))
    dag.set_dag_file(str(dagpath.with_suffix("")))

    # set up condor commands for all jobs
    condorcmds = {
        'accounting_group': args.condor_accounting_group,
        'accounting_group_user': args.condor_accounting_group_user
    }
    for cmd_ in args.condor_command:
        key, value = cmd_.split('=', 1)
        condorcmds[key.rstrip().lower()] = value.strip()

    # create omicron job
    reqmem = condorcmds.pop('request_memory', 1000)
    ojob = condor.OmicronProcessJob(args.universe,
                                    args.executable,
                                    subdir=condir,
                                    logdir=logdir,
                                    **condorcmds)
    ojob.add_condor_cmd('request_memory', reqmem)
    ojob.add_condor_cmd('+OmicronProcess', '"%s"' % group)

    # create post-processing job
    ppjob = condor.OmicronProcessJob(args.universe,
                                     find_executable('bash'),
                                     subdir=condir,
                                     logdir=logdir,
                                     tag='post-processing',
                                     **condorcmds)
    ppjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group)
    ppjob.add_short_opt('e', '')
    ppnodes = []
    rootmerge = find_executable('omicron-root-merge')
    hdf5merge = find_executable('omicron-hdf5-merge')
    ligolw_add = find_executable('ligolw_add')
    gzip = find_executable('gzip')

    # create node to remove files
    rmjob = condor.OmicronProcessJob(args.universe,
                                     str(condir / "post-process-rm.sh"),
                                     subdir=condir,
                                     logdir=logdir,
                                     tag='post-processing-rm',
                                     **condorcmds)
    rm = find_executable('rm')
    rmfiles = []
    rmjob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group)

    if args.archive:
        archivejob = condor.OmicronProcessJob(args.universe,
                                              str(condir / "archive.sh"),
                                              subdir=condir,
                                              logdir=logdir,
                                              tag='archive',
                                              **condorcmds)
        archivejob.add_condor_cmd('+OmicronPostProcess', '"%s"' % group)
        archivefiles = {}

    # loop over data segments
    for s, e in segs:

        # build trigger segment
        ts = s + padding
        te = e - padding
        td = te - ts

        # distribute segment across multiple nodes
        nodesegs = oconfig.distribute_segment(s,
                                              e,
                                              nperjob=args.max_chunks_per_job)

        omicronfiles = {}

        # build node for each parameter file
        for i, pf in enumerate(jobfiles):
            chanlist = jobfiles[pf]
            nodes = []
            # loop over distributed segments
            for subseg in nodesegs:
                if not args.skip_omicron:
                    # work out files for this job
                    nodefiles = oconfig.output_files(*subseg)
                    # build node
                    node = pipeline.CondorDAGNode(ojob)
                    node.set_category('omicron')
                    node.set_retry(args.condor_retry)
                    node.add_var_arg(str(subseg[0]))
                    node.add_var_arg(str(subseg[1]))
                    node.add_file_arg(pf)
                    for chan in chanlist:
                        for form, flist in nodefiles[chan].items():
                            # record file as output from this node
                            for f in flist:
                                node._CondorDAGNode__output_files.append(f)
                            # record file as output for this channel
                            try:
                                omicronfiles[chan][form].extend(flist)
                            except KeyError:
                                try:
                                    omicronfiles[chan][form] = flist
                                except KeyError:
                                    omicronfiles[chan] = {form: flist}
                    dag.add_node(node)
                    nodes.append(node)

            # post-process (one post-processing job per channel
            #               per data segment)
            if not args.skip_postprocessing:
                script = condir / "post-process-{}-{}-{}.sh".format(i, s, e)
                ppnode = pipeline.CondorDAGNode(ppjob)
                ppnode.add_var_arg(str(script))
                operations = []

                # build post-processing nodes for each channel
                for c in chanlist:
                    operations.append('\n# %s' % c)
                    chandir = trigdir / c
                    # work out filenames for coalesced files
                    archpath = Path(
                        io.get_archive_filename(
                            c,
                            ts,
                            td,
                            filetag=afiletag,
                            ext='root',
                        ))
                    mergepath = chandir / archpath.name
                    target = str(archpath.parent)

                    # add ROOT operations
                    if 'root' in fileformats:
                        rootfiles = ' '.join(omicronfiles[c]['root'])
                        for f in omicronfiles[c]['root']:
                            ppnode._CondorDAGNode__input_files.append(f)
                        if args.skip_root_merge or (len(
                                omicronfiles[c]['root']) == 1):
                            root = rootfiles
                        else:
                            root = str(mergepath)
                            operations.append('%s %s %s --strict' %
                                              (rootmerge, rootfiles, root))
                            rmfiles.append(rootfiles)
                            ppnode._CondorDAGNode__output_files.append(root)
                        if args.archive:
                            try:
                                archivefiles[target].append(root)
                            except KeyError:
                                archivefiles[target] = [root]
                            rmfiles.append(root)

                    # add HDF5 operations
                    if 'hdf5' in fileformats:
                        hdf5files = ' '.join(omicronfiles[c]['hdf5'])
                        for f in omicronfiles[c]['hdf5']:
                            ppnode._CondorDAGNode__input_files.append(f)
                        if args.skip_hdf5_merge or (len(
                                omicronfiles[c]['hdf5']) == 1):
                            hdf5 = hdf5files
                        else:
                            hdf5 = str(mergepath.with_suffix(".h5"))
                            operations.append(
                                '{cmd} {infiles} {outfile}'.format(
                                    cmd=hdf5merge,
                                    infiles=hdf5files,
                                    outfile=hdf5,
                                ), )
                            rmfiles.append(hdf5files)
                            ppnode._CondorDAGNode__output_files.append(hdf5)
                        if args.archive:
                            try:
                                archivefiles[target].append(hdf5)
                            except KeyError:
                                archivefiles[target] = [hdf5]
                            rmfiles.append(hdf5)

                    # add LIGO_LW operations
                    if 'xml' in fileformats:
                        xmlfiles = ' '.join(omicronfiles[c]['xml'])
                        for f in omicronfiles[c]['xml']:
                            ppnode._CondorDAGNode__input_files.append(f)
                        if (args.skip_ligolw_add
                                or len(omicronfiles[c]['xml']) == 1):
                            xml = xmlfiles
                        else:
                            xml = str(mergepath.with_suffix(".xml"))
                            operations.append(
                                '%s %s --ilwdchar-compat --output %s' %
                                (ligolw_add, xmlfiles, xml), )
                            rmfiles.append(xmlfiles)
                            ppnode._CondorDAGNode__output_files.append(xml)

                        if not args.skip_gzip:
                            operations.append(
                                '%s --force --stdout %s > %s.gz' %
                                (gzip, xml, xml))
                            rmfiles.append(xml)
                            xml = str(mergepath.with_suffix(".xml.gz"))
                            ppnode._CondorDAGNode__output_files.append(xml)

                        if args.archive:
                            try:
                                archivefiles[target].append(xml)
                            except KeyError:
                                archivefiles[target] = [xml]
                            rmfiles.append(xml)

                    # add ASCII operations
                    if 'txt' in fileformats:
                        txtfiles = ' '.join(omicronfiles[c]['txt'])
                        for f in omicronfiles[c]['txt']:
                            ppnode._CondorDAGNode__input_files.append(f)
                        if args.archive:
                            try:
                                archivefiles[target].append(txtfiles)
                            except KeyError:
                                archivefiles[target] = [txtfiles]
                            rmfiles.append(txtfiles)

                ppnode.set_category('postprocessing')
                ppnode.set_retry(str(args.condor_retry))
                if not args.skip_omicron:
                    for node in nodes:
                        ppnode.add_parent(node)
                dag.add_node(ppnode)
                ppnodes.append(ppnode)
                tempfiles.append(script)

                # write post-processing file
                if not args.rescue:
                    with script.open("w") as f:
                        # add header
                        print('#!/bin/bash -e\n#', file=f)
                        print("# omicron-process post-processing", file=f)
                        print(
                            '#\n# File created by\n# {}\n#'.format(
                                ' '.join(sys.argv), ),
                            file=f,
                        )
                        print("# Group: %s" % group, file=f)
                        print("# Segment: [%d, %d)" % (s, e), file=f)
                        print("# Channels:\n#", file=f)
                        for c in chanlist:
                            print('# %s' % c, file=f)
                        # add post-processing operations
                        print('\n'.join(operations), file=f)
                    if newdag:
                        script.chmod(0o755)

    # set 'strict' option for Omicron
    # this is done after the nodes are written so that 'strict' is last in
    # the call
    ojob.add_arg('strict')

    # do all archiving last, once all post-processing has completed
    if args.archive:
        archivenode = pipeline.CondorDAGNode(archivejob)
        acache = {fmt: list() for fmt in fileformats}
        if newdag:
            # write shell script to seed archive
            with open(archivejob.get_executable(), 'w') as f:
                print('#!/bin/bash -e\n', file=f)
                for gpsdir, filelist in archivefiles.items():
                    for fn in filelist:
                        archivenode._CondorDAGNode__input_files.append(fn)
                    # write 'mv' op to script
                    print("mkdir -p %s" % gpsdir, file=f)
                    print("cp %s %s" % (' '.join(filelist), gpsdir), file=f)
                    # record archived files in caches
                    filenames = [
                        str(Path(gpsdir) / x.name)
                        for x in map(Path, filelist)
                    ]
                    for fn in filenames:
                        archivenode._CondorDAGNode__output_files.append(fn)
                    for fmt, extensions in {
                            'xml': ('.xml.gz', '.xml'),
                            'root': '.root',
                            'hdf5': '.h5',
                            'txt': '.txt',
                    }.items():
                        try:
                            acache[fmt].extend(
                                filter(lambda x: x.endswith(extensions),
                                       filenames))
                        except KeyError:  # file format not used
                            continue
            os.chmod(archivejob.get_executable(), 0o755)
            # write caches to disk
            for fmt, fcache in acache.items():
                cachefile = cachedir / "omicron-{0}.lcf".format(fmt)
                data.write_cache(fcache, cachefile)
                logger.debug("{0} cache written to {1}".format(fmt, cachefile))
        # add node to DAG
        for node in ppnodes:
            archivenode.add_parent(node)
        archivenode.set_retry(args.condor_retry)
        archivenode.set_category('archive')
        dag.add_node(archivenode)
        tempfiles.append(archivejob.get_executable())

    # add rm job right at the end
    rmnode = pipeline.CondorDAGNode(rmjob)
    rmscript = rmjob.get_executable()
    with open(rmscript, 'w') as f:
        print('#!/bin/bash -e\n#', file=f)
        print("# omicron-process post-processing-rm", file=f)
        print('#\n# File created by\n# %s\n#' % ' '.join(sys.argv), file=f)
        print("# Group: %s" % group, file=f)
        print("# Segment: [%d, %d)" % (s, e), file=f)
        print("# Channels:\n#", file=f)
        for c in channels:
            print('# %s' % c, file=f)
        print('', file=f)
        for rmset in rmfiles:
            print('%s -f %s' % (rm, rmset), file=f)
    if newdag:
        os.chmod(rmscript, 0o755)
    tempfiles.append(rmscript)
    rmnode.set_category('postprocessing')
    if args.archive:  # run this after archiving
        rmnode.add_parent(archivenode)
    else:  # or just after post-processing if not archiving
        for node in ppnodes:
            rmnode.add_parent(node)
    dag.add_node(rmnode)

    # print DAG to file
    dagfile = Path(dag.get_dag_file()).resolve(strict=False)
    if args.rescue:
        logger.info(
            "In --rescue mode, this DAG has been reproduced in memory "
            "for safety, but will not be written to disk, the file is:", )
    elif newdag:
        dag.write_sub_files()
        dag.write_dag()
        dag.write_script()
        with open(dagfile, 'a') as f:
            print("DOT", dagfile.with_suffix(".dot"), file=f)
        logger.info("Dag with %d nodes written to" % len(dag.get_nodes()))
        print(dagfile)

    if args.no_submit:
        if newdag:
            segments.write_segments(span, segfile)
            logger.info("Segments written to\n%s" % segfile)
        sys.exit(0)

    # -- submit the DAG and babysit -------------------------------------------

    # submit DAG
    if args.rescue:
        logger.info("--- Submitting rescue DAG to condor ----")
    elif args.reattach:
        logger.info("--- Reattaching to existing DAG --------")
    else:
        logger.info("--- Submitting DAG to condor -----------")

    for i in range(args.submit_rescue_dag + 1):
        if args.reattach:  # find ID of existing DAG
            dagid = int(
                condor.find_job(Owner=getuser(),
                                OmicronDAGMan=group)['ClusterId'])
            logger.info("Found existing condor ID = %d" % dagid)
        else:  # or submit DAG
            dagmanargs = set()
            if online:
                dagmanopts = {'-append': '+OmicronDAGMan=\"%s\"' % group}
            else:
                dagmanopts = {}
            for x in args.dagman_option:
                x = '-%s' % x
                try:
                    key, val = x.split('=', 1)
                except ValueError:
                    dagmanargs.add(x)
                else:
                    dagmanopts[key] = val
            dagid = condor.submit_dag(
                str(dagfile),
                *list(dagmanargs),
                **dagmanopts,
            )
            logger.info("Condor ID = %d" % dagid)
            # write segments now -- this means that online processing will
            # _always_ move on even if the workflow fails
            if i == 0:
                segments.write_segments(span, segfile)
                logger.info("Segments written to\n%s" % segfile)
            if 'force' in args.dagman_option:
                args.dagman_option.pop(args.dagman_option.index('force'))

        # monitor the dag
        logger.debug("----------------------------------------")
        logger.info("Monitoring DAG:")
        check_call([
            "pycondor",
            "monitor",
            "--time",
            "5",
            "--length",
            "36",
            str(dagfile),
        ])
        print()
        logger.debug("----------------------------------------")
        sleep(5)
        try:
            stat = condor.get_dag_status(dagid)
        except OSError as exc:  # query failed
            logger.warning(str(exc))
            stat = {}

        # log exitcode
        if "exitcode" not in stat:
            logger.warning("DAG has exited, status unknown")
            break
        if not stat["exitcode"]:
            logger.info("DAG has exited with status {}".format(
                stat.get("exitcode", "unknown"), ))
            break
        logger.critical(
            "DAG has exited with status {}".format(stat['exitcode']), )

        # handle failure
        if i == args.submit_rescue_dag:
            raise RuntimeError("DAG has failed to complete %d times" %
                               (args.submit_rescue_dag + 1))
        else:
            rescue = condor.find_rescue_dag(str(dagfile))
            logger.warning("Rescue DAG %s was generated" % rescue)

    # mark output and error files of condor nodes that passed to be deleted
    try:
        for node, files in condor.get_out_err_files(dagid, exitcode=0).items():
            tempfiles.extend(files)
    except RuntimeError:
        pass

    # archive files
    stub = '%d-%d' % (start, end)
    for f in map(Path, ["{}.dagman.out".format(dagfile)] + keepfiles):
        archive = logdir / "{0[0]}.{1}.{0[1]}".format(
            f.name.split(".", 1),
            stub,
        )
        if str(f) == str(segfile):
            shutil.copyfile(f, archive)
        else:
            f.rename(archive)
        logger.debug("Archived path\n{} --> {}".format(f, archive))

    # clean up temporary files
    tempfiles.extend(trigdir.glob("ffconvert.*.ffl"))
    clean_tempfiles(tempfiles)

    # and exit
    logger.info("--- Processing complete ----------------")