def process_state(self, state, *args, **kwargs): config = kwargs.get('config', None) # first get all of the segments if self.segmentfile: get_segments(self.flags, state, config=config, cache=self.segmentfile, return_=False) segs = get_segments(self.metaflag, state, padding=self.padding, config=config, query=False) kwargs['segmentcache'] = Cache() else: segs = get_segments(self.metaflag, state, config=config, segdb_error=kwargs.get('segdb_error', 'raise'), padding=self.padding) # then get all of the triggers if self.channel: cache = kwargs.pop('trigcache', None) before = get_triggers(str(self.channel), self.etg, state, config=config, cache=cache, format=self.etgformat) else: before = None # then apply all of the metrics self.results[state] = evaluate_flag( segs, triggers=before, metrics=self.metrics, injections=None, minduration=self.minseglength, vetotag=str(state), channel=str(self.channel), etg=self.etg)[0] vprint(" Veto evaluation results:\n") for metric, val in self.results[state].iteritems(): vprint(' %s: %s\n' % (metric, val)) # then pass to super to make the plots kwargs['trigcache'] = Cache() return super(FlagTab, self).process_state(state, *args, **kwargs)
def find_trigger_urls(channel, etg, gpsstart, gpsend, verbose=False): """Find the paths of trigger files that represent the given observatory, channel, and ETG (event trigger generator) for a given GPS [start, end) segment. """ if etg.lower() == 'omicron': etg = '?micron' # construct search span = Segment(gpsstart, gpsend) ifo, channel = channel.split(':', 1) trigtype = "%s_%s" % (channel, etg.lower()) epoch = '*' searchbase = os.path.join(TRIGFIND_BASE_PATH, epoch, ifo, trigtype) gpsdirs = range(int(str(gpsstart)[:5]), int(str(gpsend)[:5]) + 1) trigform = ('%s-%s_%s-%s-*.xml*' % (ifo, re_dash.sub('_', channel), etg.lower(), '[0-9]' * 10)) # perform and cache results out = Cache() for gpsdir in gpsdirs: gpssearchpath = os.path.join(searchbase, str(gpsdir), trigform) if verbose: gprint("Searching %s..." % os.path.split(gpssearchpath)[0], end=' ') gpscache = Cache( map(CacheEntry.from_T050017, glob.glob(os.path.join(searchbase, str(gpsdir), trigform)))) out.extend(gpscache.sieve(segment=span)) if verbose: gprint("%d found" % len(gpscache.sieve(segment=span))) out.sort(key=lambda e: e.path) return out
def read_cache(cache, segments, etg, nproc=1, timecolumn=None, **kwargs): """Read a table of events from a cache This function is mainly meant for use from the `get_triggers` method Parameters ---------- cache : :class:`glue.lal.Cache` the formatted list of files to read segments : `~gwpy.segments.SegmentList` the list of segments to read etg : `str` the name of the trigger generator that created the files nproc : `int`, optional the number of parallel processes to use when reading **kwargs other keyword arguments are passed to the `EventTable.read` or `{tableclass}.read` methods Returns ------- table : `~gwpy.table.EventTable`, `None` a table of events, or `None` if the cache has no overlap with the segments """ if isinstance(cache, Cache): cache = cache.sieve(segmentlist=segments) cache = cache.checkfilesexist()[0] cache.sort(key=lambda x: x.segment[0]) cache = cache.pfnlist() # some readers only like filenames else: cache = [urlparse(url).path for url in cache] if etg == 'pycbc_live': # remove empty HDF5 files cache = filter_pycbc_live_files(cache, ifo=kwargs['ifo']) if len(cache) == 0: return # read triggers table = EventTable.read(cache, **kwargs) # store read keywords in the meta table if timecolumn: table.meta['timecolumn'] = timecolumn # get back from cache entry if isinstance(cache, CacheEntry): cache = Cache([cache]) # append new events to existing table try: csegs = cache_segments(cache) & segments except (AttributeError, TypeError, ValueError): csegs = SegmentList() table.meta['segments'] = csegs if timecolumn: # already filtered on-the-fly return table # filter now return keep_in_segments(table, segments, etg)
def find_kw(channel, start, end, base=None): """Find KW trigger XML files """ span = Segment(to_gps(start), to_gps(end)) channel = get_channel(channel) ifo = channel.ifo if base is None and channel.name.split(':', 1)[-1] == 'GDS-CALIB_STRAIN': tag = '%s-KW_HOFT' % ifo[0].upper() base = '/gds-%s/dmt/triggers/%s' % (ifo.lower(), tag) elif base is None: tag = '%s-KW_TRIGGERS' % ifo[0].upper() base = '/gds-%s/dmt/triggers/%s' % (ifo.lower(), tag) gps5 = int('%.5s' % start) end5 = int('%.5s' % end) out = Cache() append = out.append while gps5 <= end5: trigglob = os.path.join( base, '%s-%d' % (tag, gps5), '%s-*-*.xml' % tag) found = glob.glob(trigglob) for f in found: ce = CacheEntry.from_T050017(f) if ce.segment.intersects(span): append(ce) gps5 += 1 out.sort(key=lambda e: e.path) vprint(" Found %d files for %s (KW)\n" % (len(out), channel.ndsname)) return out
def read_cache(lcf, coltype=LIGOTimeGPS): """Read a LAL-format cache file into memory as a `Cache` Parameters ---------- lcf : `str`, `file` input file or file path to read coltype : `LIGOTimeGPS`, `int`, optional `type` for GPS times Returns ------- cache : :class:`glue.lal.Cache` a cache object, representing each line in the file as a :class:`~lal.utils.CacheEntry` """ from glue.lal import Cache # open file if not isinstance(lcf, FILE_LIKE): with open(lcf, 'r') as f: return open_cache(f, coltype=coltype) # read file out = Cache() for line in lcf: if isinstance(line, bytes): line = line.decode('utf-8') out.append(out.entry_class(line, coltype=coltype)) return out
def find_dmt_omega(channel, start, end, base=None): """Find DMT-Omega trigger XML files """ span = Segment(to_gps(start), to_gps(end)) channel = get_channel(channel) ifo = channel.ifo if base is None and channel.name.split(':', 1)[-1] == 'GDS-CALIB_STRAIN': base = '/gds-%s/dmt/triggers/%s-HOFT_Omega' % ( ifo.lower(), ifo[0].upper()) elif base is None: raise NotImplementedError("This method doesn't know how to locate DMT " "Omega trigger files for %r" % str(channel)) gps5 = int('%.5s' % start) end5 = int('%.5s' % end) out = Cache() append = out.append while gps5 <= end5: trigglob = os.path.join( base, str(gps5), '%s-%s_%s_%s_OmegaC-*-*.xml' % ( ifo, channel.system, channel.subsystem, channel.signal)) found = glob.glob(trigglob) for f in found: ce = CacheEntry.from_T050017(f) if ce.segment.intersects(span): append(ce) gps5 += 1 out.sort(key=lambda e: e.path) vprint(" Found %d files for %s (DMT-Omega)\n" % (len(out), channel.ndsname)) return out
def find_trigger_urls(channel, etg, gpsstart, gpsend, verbose=False, **kwargs): """Find the paths of trigger files that represent the given observatory, channel, and ETG (event trigger generator) for a given GPS [start, end) segment. """ # special case for KW if etg.lower() in ['kw', 'kleinewelle']: from .kw import find_dmt_cache ifo = channel.split(':')[0] kwargs.setdefault('extension', 'xml') kwargs.setdefault('check_files', True) return find_dmt_cache(gpsstart, gpsend, ifo, **kwargs) elif etg.lower() == 'omega': from .omega import find_dmt_cache ifo = channel.split(':')[0] kwargs.setdefault('check_files', True) return find_dmt_cache(gpsstart, gpsend, ifo, **kwargs) elif etg.lower() == 'omicron': etg = '?micron' # construct search span = segments.segment(gpsstart, gpsend) ifo, channel = channel.split(':', 1) trigtype = "%s_%s" % (channel, etg.lower()) epoch = '*' searchbase = os.path.join(TRIGFIND_BASE_PATH, epoch, ifo, trigtype) gpsdirs = numpy.arange(int(str(gpsstart)[:5]), int(str(gpsend)[:5])+1) trigform = ('%s-%s_%s-%s-*.xml*' % (ifo, re.sub('-', '_', channel), etg.lower(), '[0-9]'*10)) # perform and cache results out = Cache() for gpsdir in gpsdirs: gpssearchpath = os.path.join(searchbase, str(gpsdir), trigform) if verbose: sys.stdout.write("Searching %s..." % os.path.split(gpssearchpath)[0]) sys.stdout.flush() gpscache = Cache(map(CacheEntry.from_T050017, glob.glob(os.path.join(searchbase, str(gpsdir), trigform)))) out.extend(gpscache.sieve(segment=span)) if verbose: sys.stdout.write(" %d found\n" % len(gpscache.sieve(segment=span))) out.sort(key=lambda e: e.path) return out
def read_cache(cache, segments, etg, nproc=1, timecolumn=None, **kwargs): """Read a table of events from a cache This function is mainly meant for use from the `get_triggers` method Parameters ---------- cache : :class:`glue.lal.Cache` the formatted list of files to read segments : `~gwpy.segments.SegmentList` the list of segments to read etg : `str` the name of the trigger generator that created the files nproc : `int`, optional the number of parallel processes to use when reading **kwargs other keyword arguments are passed to the `EventTable.read` or `{tableclass}.read` methods Returns ------- table : `~gwpy.table.EventTable`, `None` a table of events, or `None` if the cache has no overlap with the segments """ if isinstance(cache, Cache): cache = cache.sieve(segmentlist=segments) cache = cache.checkfilesexist()[0] cache.sort(key=lambda x: x.segment[0]) if etg == 'pycbc_live': # remove empty HDF5 files cache = type(cache)( filter_pycbc_live_files(cache, ifo=kwargs['ifo'])) # if no files, skip if len(cache) == 0: return # use multiprocessing except for ascii reading # (since astropy doesn't allow it) if kwargs.get('format', 'none').startswith('ascii.'): cache = cache.pfnlist() else: kwargs['nproc'] = nproc if len(cache) == 1: cache = cache[0] # read triggers table = EventTable.read(cache, **kwargs) if timecolumn: table.meta['timecolumn'] = timecolumn # get back from cache entry if isinstance(cache, CacheEntry): cache = Cache([cache]) # append new events to existing table try: csegs = cache_segments(cache) except (AttributeError, TypeError): csegs = SegmentList() table.meta['segments'] = csegs return keep_in_segments(table, segments, etg)
def process(self, nds=None, nproc=1, config=GWSummConfigParser(), datacache=None, datafind_error='raise', **kwargs): """Process time accounting data """ for p in self.plots: if p.outputfile in globalv.WRITTEN_PLOTS: p.new = False # get archived GPS time tag = self.segmenttag % list(self.modes)[0] try: lastgps = globalv.SEGMENTS[tag].known[-1][-1] except (IndexError, KeyError): lastgps = self.span[0] # get data new = SegmentList([type(self.span)(lastgps, self.span[1])]) data = get_timeseries(self.channel, new, config=config, nds=nds, dtype='int16', datafind_error=datafind_error, nproc=nproc, cache=datacache) vprint(" All time-series data loaded\n") # find segments for ts in data: modesegments = DataQualityDict() for idx, name in self.modes.items(): # get segments for state tag = self.segmenttag % idx instate = ts == idx * ts.unit modesegments[tag] = instate.to_dqflag(name=name.strip('*')) # append segments for group group = int(idx // 10. * 10) gtag = self.segmenttag % group try: modesegments[gtag] += modesegments[tag] except KeyError: modesegments[gtag] = modesegments[tag] globalv.SEGMENTS += modesegments kwargs['segdb_error'] = 'ignore' super(AccountingTab, self).process(config=config, nds=nds, nproc=nproc, segmentcache=Cache(), datacache=datacache, datafind_error=datafind_error, **kwargs)
def test_get_coherence_spectrum(self): cache = Cache([e for c in self.FRAMES for e in self.FRAMES[c]]) a = data.get_coherence_spectrogram( ('H1:LOSC-STRAIN', 'L1:LOSC-STRAIN'), LOSC_SEGMENTS, cache=cache, stride=4, fftlength=2, overlap=1)
def process(self, *args, **kwargs): # read the segment files if os.path.isfile(self.segmentfile): segs = DataQualityFlag.read(self.segmentfile, coalesce=False) self.states[0].known = segs.known self.states[0].active = segs.active self.states[0].ready = True else: warn('Segment file %s not found.' % self.segmentfile) return if len(self.states[0].active) == 0: warn('No segments analysed by daily ahope.') return # read the cache files if os.path.isfile(self.inspiralcachefile): with open(self.inspiralcachefile, 'r') as fobj: try: self.inspiralcache = Cache.fromfile(fobj).sieve( segment=self.span) except ValueError as e: if "could not convert \'\\n\' to CacheEntry" in str(e): self.inspiralcache = Cache() else: raise else: warn("Cache file %s not found." % self.inspiralcachefile) return if os.path.isfile(self.tmpltbankcachefile): with open(self.tmpltbankcachefile, 'r') as fobj: try: self.tmpltbankcache = Cache.fromfile(fobj).sieve( segment=self.span) except ValueError: if "could not convert \'\\n\' to CacheEntry" in str(e): self.tmpltbankcache = Cache() else: raise else: warn("Cache file %s not found." % self.tmpltbankcachefile) self.tmpltbankcache = Cache() # only process if the cachfile was found super(DailyAhopeTab, self).process(*args, **kwargs)
def setup_class(cls): cls.FRAMES = {} cls._tempdir = tempfile.mkdtemp(prefix='gwsumm-test-data-') # get data for channel in LOSC_DATA: cls.FRAMES[channel] = Cache() for gwf in LOSC_DATA[channel]: target = os.path.join(cls._tempdir, os.path.basename(gwf)) download(gwf, target) cls.FRAMES[channel].append(CacheEntry.from_T050017(target))
def make_cache(): try: from lal.utils import CacheEntry except ImportError as e: pytest.skip(str(e)) segs = SegmentList() cache = Cache() for seg in [(0, 1), (1, 2), (4, 5)]: d = seg[1] - seg[0] f = 'A-B-%d-%d.tmp' % (seg[0], d) cache.append(CacheEntry.from_T050017(f, coltype=int)) segs.append(Segment(*seg)) return cache, segs
def find_online_cache(start, end, channel, **kwargs): """Find ExcessPower files from the online GSTLAL analysis for the given span @param start GPS start time for search @param end GPS end time for search @param channel UNDOCUMENTED @param kwargs UNDOCUMENTED 'ifo' observatory for search 'clustering' tag for clustering stage to search, default: unclustered 'check_files' check that the returned files can be read on disk, default False """ out = Cache() # set base directory directory = kwargs.pop("directory", ER3_RUN_DIRECTORY) ifo,channel = channel.split(":", 1) channel_dir = os.path.join(directory, ifo, "%s_excesspower" % channel) glob_query = "%s-%s_excesspower-*.xml" % (ifo, channel.replace("-", "_")) span = Segment(start, end) # optimise append = out.append splitext = os.path.splitext isfile = os.path.isfile pjoin = os.path.join intersects = span.intersects from_T050017 = CacheEntry.from_T050017 # loop over days gathering files t = start // 1e4 * 1e4 while t < end: gps_dir = os.path.join(channel_dir, "%.6s" % t) if os.path.isdir(gps_dir): file_list = glob(os.path.join(gps_dir, glob_query)) for f in file_list: e = from_T050017(f) if intersects(e.segment): append(e) t += 1e4 out.sort(key=lambda e: e.segment[0]) return out
def find_trigger_urls(channel, etg, gpsstart, gpsend, verbose=False): """Find the paths of trigger files that represent the given observatory, channel, and ETG (event trigger generator) for a given GPS [start, end) segment. """ if etg.lower().startswith('omicron'): etg = '?' + etg[1:] # construct search gpsstart = to_gps(gpsstart).seconds gpsend = to_gps(gpsend).seconds span = Segment(gpsstart, gpsend) ifo, channel = channel.split(':', 1) trigtype = "%s_%s" % (channel, etg.lower()) epoch = '*' searchbase = os.path.join(TRIGFIND_BASE_PATH, epoch, ifo, trigtype) gpsdirs = range(int(str(gpsstart)[:5]), int(str(gpsend)[:5]) + 1) trigform = ('%s-%s_%s-%s-*.xml*' % (ifo, re_dash.sub('_', channel), etg.lower(), '[0-9]' * 10)) # test for channel-level directory if not glob.glob(searchbase): raise ValueError("No channel-level directory found at %s. Either the " "channel name or ETG names are wrong, or this " "channel is not configured for this ETG." % searchbase) # perform and cache results out = Cache() append = out.append for gpsdir in gpsdirs: gpssearchpath = os.path.join(searchbase, str(gpsdir), trigform) if verbose: gprint("Searching %s..." % os.path.split(gpssearchpath)[0], end=' ') found = set( map(os.path.realpath, glob.glob(os.path.join(searchbase, str(gpsdir), trigform)))) n = 0 for f in found: ce = CacheEntry.from_T050017(f) if ce.segment.intersects(span): append(ce) n += 1 if verbose: gprint("%d found" % n) out.sort(key=lambda e: e.path) return out
def process_state(self, state, nds='guess', multiprocess=False, config=GWSummConfigParser(), segdb_error='raise', trigcache=None, datacache=None): if trigcache is None: trigcache = self.inspiralcache if datacache is None: datacache = Cache() super(DailyAhopeTab, self).process_state(state, nds=nds, multiprocess=multiprocess, config=config, datacache=datacache, trigcache=trigcache)
def find_contiguous(*caches): """Separate one or more caches into sets of contiguous caches Parameters ---------- *caches one or more :class:`~glue.lal.Cache` objects Returns ------- caches : `iter` of :class:`~glue.lal.Cache` an interable yielding each contiguous cache """ try: flat = flatten(*caches) except IndexError: flat = Cache() for segment in cache_segments(flat): yield flat.sieve(segment=segment)
def find_trigger_files(channel, etg, segments, **kwargs): """Find trigger files for a given channel and ETG Parameters ---------- channel : `str` name of channel to find etg : `str` name of event trigger generator to find segments : :class:`~glue.segments.segmentlist` list of segments to find **kwargs all other keyword arguments are passed to `trigfind.find_trigger_urls` Returns ------- cache : :class:`~glue.lal.Cache` cache of trigger file paths See Also -------- trigfind.find_trigger_urls for details on file discovery """ cache = Cache() for start, end in segments: try: cache.extend(trigfind.find_trigger_urls(channel, etg, start, end, **kwargs)) except ValueError as e: if str(e).lower().startswith('no channel-level directory'): warnings.warn(str(e)) else: raise return cache.unique()
def _get_timeseries_dict(channels, segments, config=None, cache=None, query=True, nds=None, frametype=None, multiprocess=True, return_=True, statevector=False, archive=True, datafind_error='raise', **ioargs): """Internal method to retrieve the data for a set of like-typed channels using the :meth:`TimeSeriesDict.read` accessor. """ channels = map(get_channel, channels) # set classes if statevector: ListClass = StateVectorList DictClass = StateVectorDict else: ListClass = TimeSeriesList DictClass = TimeSeriesDict # check we have a configparser if config is None: config = GWSummConfigParser() # read segments from global memory keys = dict((c.ndsname, make_globalv_key(c)) for c in channels) havesegs = reduce( operator.and_, (globalv.DATA.get(keys[channel.ndsname], ListClass()).segments for channel in channels)) new = segments - havesegs # get processes if multiprocess is True: nproc = count_free_cores() elif multiprocess is False: nproc = 1 else: nproc = multiprocess # read channel information filter_ = dict() resample = dict() dtype_ = dict() for channel in channels: try: filter_[channel.ndsname] = channel.filter except AttributeError: pass try: resample[channel] = float(channel.resample) except AttributeError: pass if channel.dtype is None: dtype_[channel] = ioargs.get('dtype') else: dtype_[channel] = channel.dtype # work out whether to use NDS or not if nds is None and cache is not None: nds = False elif nds is None: nds = 'LIGO_DATAFIND_SERVER' not in os.environ # read new data query &= (abs(new) > 0) if cache is not None: query &= len(cache) > 0 if query: for channel in channels: globalv.DATA.setdefault(keys[channel.ndsname], ListClass()) # open NDS connection if nds and config.has_option('nds', 'host'): host = config.get('nds', 'host') port = config.getint('nds', 'port') try: ndsconnection = nds2.connection(host, port) except RuntimeError as e: if 'SASL authentication' in str(e): from gwpy.io.nds import kinit kinit() ndsconnection = nds2.connection(host, port) else: raise frametype = source = 'nds' ndstype = channels[0].type elif nds: ndsconnection = None frametype = source = 'nds' ndstype = channels[0].type # or find frame type and check cache else: ifo = channels[0].ifo frametype = frametype or channels[0].frametype if frametype is not None and frametype.endswith('%s_M' % ifo): new = type(new)([s for s in new if abs(s) >= 60.]) elif frametype is not None and frametype.endswith('%s_T' % ifo): new = type(new)([s for s in new if abs(s) >= 1.]) if cache is not None: fcache = cache.sieve(ifos=ifo[0], description=frametype, exact_match=True) else: fcache = Cache() if (cache is None or len(fcache) == 0) and len(new): span = new.extent().protract(8) fcache = find_frames(ifo, frametype, span[0], span[1], config=config, gaps='ignore', onerror=datafind_error) cachesegments = find_cache_segments(fcache) gaps = SegmentList([span]) - cachesegments if abs(gaps) and frametype == '%s_HOFT_C00' % ifo: f2 = '%s_DMT_C00' % ifo vprint(" Gaps discovered in aggregated h(t) type " "%s, checking %s\n" % (frametype, f2)) c2 = find_frames(ifo, f2, span[0], span[1], config=config, gaps='ignore', onerror=datafind_error) g2 = SegmentList([span]) - find_cache_segments(c2) if abs(g2) < abs(gaps): vprint(" Greater coverage with frametype %s\n" % f2) fcache = c2 frametype = f2 else: vprint(" No extra coverage with frametype %s\n" % f2) # parse discontiguous cache blocks and rebuild segment list cachesegments = find_cache_segments(fcache) new &= cachesegments source = 'frames' # set ctype if reading with framecpp if cache is None and frametype in ADC_TYPES and HAS_FRAMECPP: ioargs['type'] = 'adc' for channel in channels: channel.frametype = frametype # check whether each channel exists for all new times already qchannels = [] qresample = {} qdtype = {} for channel in channels: oldsegs = globalv.DATA.get(channel.ndsname, ListClass()).segments if abs(new - oldsegs) != 0: qchannels.append(channel) if channel in resample: qresample[channel] = resample[channel] qdtype[channel] = dtype_.get(channel, ioargs.get('dtype')) ioargs['dtype'] = qdtype # loop through segments, recording data for each if len(new) and nproc > 1: vprint(" Fetching data (from %s) for %d channels [%s]" % (source, len(qchannels), nds and ndstype or frametype)) for segment in new: # force reading integer-precision segments segment = type(segment)(int(segment[0]), int(segment[1])) if abs(segment) < 1: continue if nds: tsd = DictClass.fetch(qchannels, segment[0], segment[1], connection=ndsconnection, type=ndstype, **ioargs) else: # pad resampling if segment[1] == cachesegments[-1][1] and qresample: resamplepad = 8 if abs(segment) <= resamplepad: continue segment = type(segment)(segment[0], segment[1] - resamplepad) segcache = fcache.sieve( segment=segment.protract(resamplepad)) else: segcache = fcache.sieve(segment=segment) # set minute trend times modulo 60 from GPS 0 if (re.match('(?:(.*)_)?[A-Z]\d_M', str(frametype)) or (ifo == 'C1' and frametype == 'M')): segstart = int(segment[0]) // 60 * 60 segend = int(segment[1]) // 60 * 60 if segend >= segment[1]: segend -= 60 # and ignore segments shorter than 1 full average if (segend - segstart) < 60: continue segcache = segcache.sieve( segment=type(segment)(segstart, segend)) else: segstart, segend = map(float, segment) # pull filters out because they can break multiprocessing if nproc > 1: for c in qchannels: if c.ndsname in filter_: del c.filter # read data tsd = DictClass.read(segcache, qchannels, start=segstart, end=segend, nproc=nproc, resample=qresample, **ioargs) # put filters back for c in qchannels: if c.ndsname in filter_: c.filter = filter_[c.ndsname] for (channel, data) in tsd.iteritems(): key = keys[channel.ndsname] if (key in globalv.DATA and data.span in globalv.DATA[key].segments): continue if data.unit is None: data.unit = 'undef' for seg in globalv.DATA[key].segments: if seg.intersects(data.span): data = data.crop(*(data.span - seg)) break try: filt = filter_[channel.ndsname] except KeyError: pass else: # filter with function if callable(filt): try: data = filt(data) except TypeError as e: if 'Can only apply' in str(e): data.value[:] = filt(data.value) else: raise # filter with gain elif (isinstance(filt, tuple) and len(filt) == 3 and len(filt[0] + filt[1]) == 0): try: data *= filt[2] except TypeError: data = data * filt[2] # filter zpk elif isinstance(filt, tuple): data = data.filter(*filt) # filter fail else: raise ValueError("Cannot parse filter for %s: %r" % (channel.ndsname, filt)) if isinstance(data, StateVector) or ':GRD-' in str(channel): try: data.unit = units.dimensionless_unscaled except AttributeError: data._unit = units.dimensionless_unscaled if hasattr(channel, 'bits'): data.bits = channel.bits elif data.unit is None: data._unit = channel.unit # XXX: HACK for failing unit check if len(globalv.DATA[key]): data._unit = globalv.DATA[key][-1].unit # update channel type for trends if data.channel.type is None and (data.channel.trend is not None): if data.dt.to('s').value == 1: data.channel.type = 's-trend' elif data.dt.to('s').value == 60: data.channel.type = 'm-trend' # append and coalesce add_timeseries(data, key=key, coalesce=True) if multiprocess: vprint('.') if len(new): vprint("\n") if not return_: return # return correct data out = OrderedDict() for channel in channels: data = ListClass() if keys[channel.ndsname] not in globalv.DATA: out[channel.ndsname] = ListClass() else: for ts in globalv.DATA[keys[channel.ndsname]]: for seg in segments: if abs(seg) == 0 or abs(seg) < ts.dt.value: continue if ts.span.intersects(seg): common = map(float, ts.span & seg) cropped = ts.crop(*common, copy=False) if cropped.size: data.append(cropped) out[channel.ndsname] = data.coalesce() return out
def find_frames(ifo, frametype, gpsstart, gpsend, config=GWSummConfigParser(), urltype='file', gaps='warn', onerror='raise'): """Query the datafind server for GWF files for the given type Parameters ---------- ifo : `str` prefix for the IFO of interest (either one or two characters) frametype : `str` name of the frametype to find gpsstart : `int` GPS start time of the query gpsend : `int` GPS end time of the query config : `~ConfigParser.ConfigParser`, optional configuration with `[datafind]` section containing `server` specification, otherwise taken from the environment urltype : `str`, optional what type of file paths to return, default: `file` gaps : `str`, optional what to do when gaps are detected, one of - `ignore` : do nothing - `warn` : display the existence of gaps but carry on - `raise` : raise an exception onerror : `str`, optional what to do when the `~glue.datafind` query itself fails, same options as for ``gaps`` Returns ------- cache : `~glue.lal.Cache` a list of structured frame file descriptions matching the ifo and frametype requested """ vprint(' Finding %s-%s frames for [%d, %d)...' % (ifo[0], frametype, int(gpsstart), int(gpsend))) # find datafind host:port try: host = config.get('datafind', 'server') except (NoOptionError, NoSectionError): try: host = os.environ['LIGO_DATAFIND_SERVER'] except KeyError: host = None port = None else: try: host, port = host.rsplit(':', 1) except ValueError: port = None else: port = int(port) else: port = config.getint('datafind', 'port') # get credentials if port == 80: cert = None key = None else: cert, key = datafind.find_credential() # XXX HACK: LLO changed frame types on Dec 6 2013: LLOCHANGE = 1070291904 if re.match('L1_{CRMT}', frametype) and gpsstart < LLOCHANGE: frametype = frametype[-1] # query frames ifo = ifo[0].upper() gpsstart = int(floor(gpsstart)) gpsend = int(ceil(min(globalv.NOW, gpsend))) if gpsend <= gpsstart: return Cache() # parse match try: frametype, match = frametype.split('|', 1) except ValueError: match = None def _query(): if cert is not None: dfconn = datafind.GWDataFindHTTPSConnection(host=host, port=port, cert_file=cert, key_file=key) else: dfconn = datafind.GWDataFindHTTPConnection(host=host, port=port) return dfconn.find_frame_urls(ifo[0].upper(), frametype, gpsstart, gpsend, urltype=urltype, on_gaps=gaps, match=match) try: cache = _query() except RuntimeError as e: sleep(1) try: cache = _query() except RuntimeError: if 'Invalid GPS times' in str(e): e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend), ) if onerror in ['ignore', None]: pass elif onerror in ['warn']: warnings.warn('Caught %s: %s' % (type(e).__name__, str(e))) else: raise cache = Cache() # XXX: if querying for day of LLO frame type change, do both if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T'] and gpsstart < LLOCHANGE < gpsend): start = len(cache) and cache[-1].segment[1] or gpsstart if start < gpsend: cache.extend( dfconn.find_frame_urls(ifo[0].upper(), 'L1_%s' % frametype, start, gpsend, urltype=urltype, on_gaps=gaps)[1:]) # extend cache beyond datafind's knowledge to reduce latency try: latest = cache[-1] ngps = len( re_gwf_gps_epoch.search(os.path.dirname( latest.path)).groupdict()['gpsepoch']) except (IndexError, AttributeError): pass else: while True: s, e = latest.segment if s >= gpsend: break # replace GPS time of file basename new = latest.path.replace('-%d-' % s, '-%d-' % e) # replace GPS epoch in dirname new = new.replace('%s/' % str(s)[:ngps], '%s/' % str(e)[:ngps]) if os.path.isfile(new): latest = CacheEntry.from_T050017(new) cache.append(latest) else: break # validate files existing and return cache, _ = cache.checkfilesexist() vprint(' %d found.\n' % len(cache)) return cache
def find_dmt_cache(start, end, ifo, extension="xml", check_files=False, **kwargs): """Find DMT KW files for the given GPS period. @param start GPS start time for search @param end GPS end time for search @param ifo observatory for search @param extension UNDOCUMENTED @param check_files check that the returned files can be read on disk, default False @param kwargs UNDOCUMENTED """ out = Cache() # verify host host = {'G1': 'atlas', 'H1': 'ligo-wa', 'H2': 'ligo-wa', 'L1': 'ligo-la'} if (not kwargs.has_key('directory') and not re.search(host[ifo], getfqdn())): sys.stderr.write("WARNING: KW online files are not available for " "IFO=%s on this host." % ifo) sys.stderr.flush() return out span = segments.segment(start, end) # set known epochs known_epochs = [1026263104] # get parameters dt = int(kwargs.pop("duration", 64)) epoch = kwargs.pop("epoch", known_epochs) filetag = kwargs.pop("filetag", "KW_TRIGGERS") dirtag = filetag.endswith("_TRENDS") and filetag[:-7] or filetag try: iter(epoch) except TypeError: epoch = [int(epoch)] overlap = int(kwargs.pop("overlap", 0)) directory = kwargs.pop( "duration", "/gds-%s/dmt/triggers/%s-%s" % (ifo.lower(), ifo[0].upper(), dirtag)) # optimise append = out.append splitext = os.path.splitext isfile = os.path.isfile intersects = span.intersects segment = segments.segment from_T050017 = CacheEntry.from_T050017 # get times epoch_idx = bisect.bisect_right(epoch, start) - 1 next_epoch = len(epoch) >= epoch_idx + 2 and epoch[epoch_idx + 1] or 0 start_time = int(start - numpy.mod(start - epoch[epoch_idx], dt - overlap)) t = start_time def _kw_file(gps, ifo): return ("%s/%s-%s-%.5s/" "%s-%s-%.10d-%d.%s" % (directory, ifo.upper()[0], dirtag, gps, ifo.upper()[0], filetag, gps, dt, extension)) # loop over time segments constructing file paths while t < end: fp = _kw_file(t, ifo) if (intersects(segment(t, t + dt)) and (not check_files or isfile(fp))): append(from_T050017(fp)) t += dt - overlap if next_epoch and t > next_epoch: t = next_epoch epoch_idx += 1 next_epoch = len(epoch) >= epoch_idx + 2 and epoch[epoch_idx + 1] or 0 out.sort(key=lambda e: e.path) return out
def find_online_cache(start, end, ifo, mask='DOWNSELECT', check_files=False, **kwargs): """Find KW Online files for the given GPS period. @param start GPS start time for search @param end GPS end time for search @param ifo observatory for search @param mask description tag of KW ASCII to search @param check_files check that the returned files can be read on disk, default False @param kwargs UNDOCUMENTED """ out = Cache() # verify host host = {'G1': 'atlas', 'H1': 'ligo-wa', 'H2': 'ligo-wa', 'L1': 'ligo-la'} if (not kwargs.has_key('directory') and not re.search(host[ifo], getfqdn())): sys.stderr.write("WARNING: KW online files are not available for " "IFO=%s on this host." % ifo) sys.stderr.flush() return out span = segments.segment(start, end) # get parameters dt = kwargs.pop("duration", 64) overlap = kwargs.pop("overlap", 8) if ifo == "G1": directory = kwargs.pop("directory", "/home/omega/online/G1/segments") epoch = kwargs.pop("epoch", 983669456) else: directory = kwargs.pop("directory",\ "/home/omega/online/%s/archive/S6/segments" % ifo) # optimise append = out.append splitext = os.path.splitext isfile = os.path.isfile intersects = span.intersects segment = segments.segment from_T050017 = CacheEntry.from_T050017 # get times start_time = int(start - numpy.mod(start - epoch, dt - overlap)) t = start_time if ifo == "G1": def _omega_file(gps, ifo): return ("%s/%.5d/%.10d-%.10d/%s-KW_TRIGGERS_%s-%.10d-%d.txt" % (basedir, gps / 100000, gps, gps + dt, ifo, mask, gps, dt)) else: def _omega_file(gps, ifo): return ("%s/%s-%s/%s-KW_TRIGGERS_%s-%s-%s.txt" % (basedir, gps, gps + dt, ifo, mask, gps, dt)) # loop over time segments constructing file paths while t < end: fp = _omega_file(t, ifo) if (intersects(segment(t, t + dt)) and (not check_files or isfile(fp))): append(from_T050017(fp)) t += dt - overlap out.sort(key=lambda e: e.path) return out
def get_cache(start, end, ifo, channel, mask='DOWNSELECT', checkfilesexist=False,\ **kwargs): """ Returns a glue.lal.Cache contatining CacheEntires for all omega online trigger files between the given start and end time for the given ifo. """ cache = Cache() # verify host host = {'G1': 'atlas', 'H1': 'ligo-wa', 'H2': 'ligo-wa', 'L1': 'ligo-la'} if (not kwargs.has_key('directory') and not re.search(host[ifo], getfqdn())): sys.stderr.write("warning: Omega online files are not available for "+\ "IFO=%s on this host." % ifo) sys.stderr.flush() return cache span = segments.segment(start, end) if ifo == 'G1': if channel: kwargs.setdefault( 'directory', '/home/omega/online/%s/segments' % channel.replace(':', '_')) else: kwargs.setdefault('directory', '/home/omega/online/G1/segments') kwargs.setdefault('epoch', 0) else: kwargs.setdefault('directory',\ '/home/omega/online/%s/archive/S6/segments' % ifo) kwargs.setdefault('epoch', 931211808) kwargs.setdefault('duration', 64) kwargs.setdefault('overlap', 8) # optimise append = cache.append splitext = os.path.splitext isfile = os.path.isfile intersects = span.intersects segment = segments.segment from_T050017 = CacheEntry.from_T050017 basedir = kwargs['directory'] basetime = kwargs['epoch'] triglength = kwargs['duration'] overlap = kwargs['overlap'] # get times start_time = int(start - numpy.mod(start - basetime, triglength - overlap)) t = start_time # loop over time segments constructing file paths and appending to the cache while t < end: if ifo == 'G1': trigfile = '%s/%.5d/%.10d-%10.d/%s-OMEGA_TRIGGERS_%s-%.10d-%d.txt'\ % (basedir, t/100000, t, t+triglength, ifo, mask, t, triglength) else: trigfile = '%s/%.10d-%10.d/%s-OMEGA_TRIGGERS_%s-%.10d-%d.txt'\ % (basedir, t, t+triglength, ifo, mask, t, triglength) if intersects(segment(t, t+triglength))\ and (not checkfilesexist or isfile(trigfile)): append(from_T050017(trigfile)) t += triglength - overlap cache.sort(key=lambda e: e.path) return cache
def main(args=None): """Run the GWSumm command-line interface """ parser = create_parser() args = parser.parse_args(args=args) if args.debug: warnings.simplefilter('error', DeprecationWarning) # set verbose output options globalv.VERBOSE = args.verbose # find all config files args.config_file = [ os.path.expanduser(fp) for csv in args.config_file for fp in csv.split(',') ] # check segdb option if args.on_segdb_error not in ['raise', 'warn', 'ignore']: parser.error("Invalid option --on-segdb-error='%s'" % args.on_segdb_error) # read configuration file config = GWSummConfigParser() config.optionxform = str if args.ifo: config.set_ifo_options(args.ifo, section=DEFAULTSECT) config.set(DEFAULTSECT, 'user', getpass.getuser()) config.read(args.config_file) try: ifo = config.get(DEFAULTSECT, 'IFO') except NoOptionError: ifo = None finally: globalv.IFO = ifo # interpolate section names interp = {} if ifo: interp['ifo'] = ifo.lower() interp['IFO'] = ifo.title() config.interpolate_section_names(**interp) # double-check week mode matches calendar setting if args.mode == 'week': if config.has_option("calendar", "start-of-week"): weekday = getattr(calendar, config.get("calendar", "start-of-week").upper()) if weekday != args.week.timetuple().tm_wday: msg = ("Cannot process week starting on %s. The " "'start-of-week' option in the [calendar] section " "of the INI file specifies weeks start on %ss." % (args.week.strftime('%Y%m%d'), config.get("calendar", "start-of-week"))) raise parser.error(msg) # record times in ConfigParser config.set_date_options(args.gpsstart, args.gpsend, section=DEFAULTSECT) # convert times for convenience span = Segment(args.gpsstart, args.gpsend) utc = tconvert(args.gpsstart) starttime = Time(float(args.gpsstart), format='gps') endtime = Time(float(args.gpsend), format='gps') # set mode and output directory mode.set_mode(args.mode) try: path = mode.get_base(utc) except ValueError: path = os.path.join('%d-%d' % (args.gpsstart, args.gpsend)) # set LAL FFT plan wisdom level duration = min(globalv.NOW, args.gpsend) - args.gpsstart if duration > 200000: fft_lal.LAL_FFTPLAN_LEVEL = 3 elif duration > 40000: fft_lal.LAL_FFTPLAN_LEVEL = 2 else: fft_lal.LAL_FFTPLAN_LEVEL = 1 # set global html only flag if args.html_only: globalv.HTMLONLY = True # build directories mkdir(args.output_dir) os.chdir(args.output_dir) plotdir = os.path.join(path, 'plots') mkdir(plotdir) # -- setup -------------------------------------- LOGGER.info(" -- GW interferometer summary information system -- ") LOGGER.debug("This is process {}".format(os.getpid())) LOGGER.debug("You have selected {} mode".format(mode.get_mode().name)) LOGGER.debug("Start time: {0} ({1})".format(starttime.utc.iso, starttime.gps)) LOGGER.debug("End time: {0} ({1})".format(endtime.utc.iso, endtime.gps)) LOGGER.debug("Output directory: {}".format( os.path.abspath(os.path.join(args.output_dir, path)))) # -- Finalise configuration LOGGER.info("Loading configuration") plugins = config.load_plugins() if plugins: LOGGER.debug(" -- Loaded {} plugins:".format(len(plugins))) for mod in plugins: LOGGER.debug(" %s" % mod) units = config.load_units() LOGGER.debug(" Loaded %d units" % len(units)) channels = config.load_channels() LOGGER.debug(" Loaded %d channels" % len(channels)) states = config.load_states() LOGGER.debug(" Loaded %d states" % len(states)) rcp = config.load_rcParams() LOGGER.debug(" Loaded %d rcParams" % len(rcp)) # read list of tabs tablist = TabList.from_ini(config, match=args.process_tab, path=path, plotdir=plotdir) tablist.sort(reverse=True) tabs = sorted(tablist.get_hierarchy(), key=tablist._sortkey) LOGGER.info(" Loaded %d tabs [%d parents overall]" % (len(tablist), len(tabs))) # read caches cache = {} for (key, var) in zip(['datacache', 'trigcache', 'segmentcache'], [args.data_cache, args.event_cache, args.segment_cache]): if var: LOGGER.info("Reading %s from %d files... " % (key, len(var))) cache[key] = Cache() for fp in var: with open(fp, 'r') as f: cache[key].extend(Cache.fromfile(f)) cache[key] = cache[key].sieve(segment=span) LOGGER.debug("Done [%d entries]" % len(cache[key])) # -- read archive ------------------------------- if not hasattr(args, 'archive'): args.archive = False if args.html_only: args.archive = False args.daily_archive = False elif args.archive is True: args.archive = 'GW_SUMMARY_ARCHIVE' archives = [] if args.archive: archivedir = os.path.join(path, 'archive') mkdir(archivedir) args.archive = os.path.join( archivedir, '%s-%s-%d-%d.h5' % (ifo, args.archive, args.gpsstart, args.gpsend - args.gpsstart)) if os.path.isfile(args.archive): archives.append(args.archive) else: LOGGER.debug( "No archive found in %s, one will be created at the end" % args.archive) # read daily archive for week/month/... mode if hasattr(args, 'daily_archive') and args.daily_archive: # find daily archive files archives.extend( archive.find_daily_archives(args.gpsstart, args.gpsend, ifo, args.daily_archive, archivedir)) # then don't read any actual data cache['datacache'] = Cache() for arch in archives: LOGGER.info("Reading archived data from %s" % arch) archive.read_data_archive(arch) LOGGER.debug("Archive data loaded") # -- read HTML configuration -------------------- css = config.get_css(section='html') javascript = config.get_javascript(section='html') # enable comments try: globalv.HTML_COMMENTS_NAME = config.get('html', 'disqus-shortname') except (NoOptionError, NoSectionError): pass # find new ifo bases ifobases = {} try: bases_ = config.nditems('html') except NoSectionError: pass else: base_reg = re.compile(r'-base\Z') for key, val in bases_: if base_reg.search(key): ifobases[key.rsplit('-', 1)[0]] = val ifobases = OrderedDict(sorted(ifobases.items(), key=lambda x: x[0])) # -- write auxiliary pages ---------------------- # get URL from output directory if 'public_html' in os.getcwd(): urlbase = os.path.sep + os.path.join( '~%s' % config.get(DEFAULTSECT, 'user'), os.getcwd().split('public_html', 1)[1][1:]) base = urlbase # otherwise get URL from html config elif ifo in ifobases: urlbase = urlparse(ifobases[ifo]).path base = urlbase # otherwise let the write_html processor work it out on-the-fly else: urlbase = None base = None # get link to issues report page try: issues = config.get('html', 'issues') except KeyError: issues = True # write 404 error page if not args.no_htaccess and not args.no_html and urlbase: top = os.path.join(urlbase, path) four0four = get_tab('404')(span=span, parent=None, path=path, index=os.path.join(path, '404.html')) four0four.write_html(css=css, js=javascript, tabs=tabs, ifo=ifo, ifomap=ifobases, top=top, base=base, writedata=not args.html_only, writehtml=not args.no_html, issues=issues) url404 = os.path.join(urlbase, four0four.index) with open(os.path.join(path, '.htaccess'), 'w') as htaccess: print('Options -Indexes', file=htaccess) print('ErrorDocument 404 %s' % url404, file=htaccess) print('ErrorDocument 403 %s' % url404, file=htaccess) # write config page about = get_tab('about')(span=span, parent=None, path=path) if not args.no_html: mkdir(about.path) about.write_html(css=css, js=javascript, tabs=tabs, config=config.files, prog=PROG, ifo=ifo, ifomap=ifobases, about=about.index, base=base, issues=issues, writedata=not args.html_only, writehtml=not args.no_html) # -- read bulk data ----------------------------- # XXX: bulk data reading could optimise things # XXX: but has never been used, so should remove (DMM 18/01/16) if args.bulk_read and not args.html_only: LOGGER.info("Reading all data in BULK") allts = set() allsv = set() allflags = set() for tab in tablist: snames = [] for state in tab.states: snames.append(state.name) if state.definition: allflags.update(re_flagdiv.split(state.definition)) # get all data defined for the 'all' state if ALLSTATE in snames: allts.update( tab.get_channels('timeseries', 'spectrogram', 'spectrum', 'histogram')) allsv.update(tab.get_channels('statevector')) allflags.update(tab.get_flags('segments')) # or get data for plots defined over all states else: for plot in tab.plots: if plot.state is not None: continue if plot.type in [ 'timeseries', 'spectrogram', 'spectrum', 'histogram' ]: allts.update(plot.channels) elif plot.type in ['statevector']: allsv.update(plot.channels) elif plot.type in ['segments']: allflags.update([ f for cflag in plot.flags for f in re_flagdiv.split(cflag)[::2] if f ]) allseg = SegmentList([span]) if len(allflags): LOGGER.info( "%d data-quality flags identified for segment query from all " "tabs" % len(allflags)) get_segments(allflags, allseg, config=config, return_=False) if len(allts): LOGGER.info("%d channels identified for TimeSeries from all tabs" % len(allts)) get_timeseries_dict(allts, allseg, config=config, nds=args.nds, nproc=args.multiprocess, return_=False) if len(allsv): LOGGER.info( "%d channels identified for StateVector from all tabs" % len(allsv)) get_timeseries_dict(allsv, allseg, config=config, nds=args.nds, statevector=True, nproc=args.multiprocess, return_=False) # -- process all tabs --------------------------- # TODO: consider re-working this loop as TabList.process_all for tab in tablist: if tab.parent: name = '%s/%s' % (tab.parent.name, tab.name) else: name = tab.name if not args.html_only and isinstance(tab, get_tab('_processed')): LOGGER.debug("Processing %s" % name) tab.process(config=config, nds=args.nds, nproc=args.multiprocess, segdb_error=args.on_segdb_error, datafind_error=args.on_datafind_error, **cache) if not tab.hidden and not isinstance(tab, get_tab('link')): mkdir(tab.href) tab.write_html(css=css, js=javascript, tabs=tabs, ifo=ifo, ifomap=ifobases, about=about.index, base=base, issues=issues, writedata=not args.html_only, writehtml=not args.no_html) # archive this tab if args.archive: LOGGER.info("Writing data to archive") archive.write_data_archive(args.archive) LOGGER.debug("Archive written to {}".format( os.path.abspath(args.archive))) LOGGER.debug("%s complete" % (name)) LOGGER.info("-- Data products written, all done --")
def process(self, nds='guess', multiprocess=True, config=GWSummConfigParser(), datacache=None, segmentcache=Cache(), datafind_error='raise', **kwargs): """Process data for the given state. """ ifo = self.ifo for p in self.plots: if p.outputfile in globalv.WRITTEN_PLOTS: p.new = False # -------------------------------------------------------------------- # work out which channels are needed prefix = '%s:GRD-%s_%%s' % (self.ifo, self.node) state = sorted(self.states, key=lambda s: abs(s.active))[0] try: version = get_timeseries( prefix % 'VERSION', state, config=config, nds=nds, multiprocess=multiprocess, cache=datacache, datafind_error=datafind_error, ).join(gap='ignore').min().value except ValueError: version = 1201 prefices = ['STATE_N', 'REQUEST_N', 'NOMINAL_N', 'OK', 'MODE'] if version >= 1200: prefices.append('OP') alldata = get_timeseries_dict([prefix % x for x in prefices], state, config=config, nds=nds, multiprocess=multiprocess, cache=datacache, datafind_error=datafind_error, dtype='int16').values() vprint(" All time-series data loaded\n") # -------------------------------------------------------------------- # find segments and transitions self.transitions = dict((v, []) for v in self.grdstates) for sdata, rdata, ndata, okdata in zip(*alldata[:4]): ssegs = DataQualityDict() rsegs = DataQualityDict() nsegs = DataQualityDict() oksegs = (okdata == 1).to_dqflag(name='Node OK') for v, name in self.grdstates.iteritems(): # get segments for state tag = self.segmenttag % name instate = sdata == v ssegs[tag] = instate.to_dqflag(name=name) transin = (numpy.diff(instate.astype(int)) == 1).nonzero()[0] + 1 transout = (numpy.diff(instate.astype(int)) == -1).nonzero()[0] + 1 for i, j in zip(transin, transout): t = sdata.times[i].value from_ = sdata[i - 1].value to_ = sdata[j].value self.transitions[v].append((t, from_, to_)) # get segments for request tag = self.segmenttag % name + REQUESTSTUB instate = rdata == v rsegs[tag] = instate.to_dqflag(name=name) # get segments for nominal tag = self.segmenttag % name + NOMINALSTUB nom = ndata == v nsegs[tag] = nom.to_dqflag(name=name) globalv.SEGMENTS += ssegs globalv.SEGMENTS += rsegs globalv.SEGMENTS += nsegs globalv.SEGMENTS += {self.segmenttag % 'OK': oksegs} super(GuardianTab, self).process(config=config, nds=nds, multiprocess=multiprocess, datacache=datacache, segmentcache=segmentcache, **kwargs)
def read_cache(cache, channel, start=None, end=None, resample=None, gap=None, pad=None, nproc=1, format=None, **kwargs): """Read a `TimeSeries` from a cache of data files using multiprocessing. The inner-workings are agnostic of data-type, but can only handle a single data type at a time. Parameters ---------- cache : :class:`glue.lal.Cache`, `str` cache of GWF frame files, or path to a LAL-format cache file on disk channel : :class:`~gwpy.detector.channel.Channel`, `str` data channel to read from frames start : `Time`, `~gwpy.time.LIGOTimeGPS`, optional start GPS time of desired data end : `Time`, `~gwpy.time.LIGOTimeGPS`, optional end GPS time of desired data resample : `float`, optional rate (samples per second) to resample format : `str`, optional name of data file format, e.g. ``gwf`` or ``hdf``. nproc : `int`, default: ``1`` maximum number of independent frame reading processes, default is set to single-process file reading. gap : `str`, optional how to handle gaps in the cache, one of - 'ignore': do nothing, let the undelying reader method handle it - 'warn': do nothing except print a warning to the screen - 'raise': raise an exception upon finding a gap (default) - 'pad': insert a value to fill the gaps pad : `float`, optional value with which to fill gaps in the source data, only used if gap is not given, or `gap='pad'` is given Notes ----- The number of independent processes spawned by this function can be calculated as ``min(maxprocesses, len(cache)//minprocesssize)``. Returns ------- data : :class:`~gwpy.timeseries.TimeSeries` a new `TimeSeries` containing the data read from disk """ from gwpy.segments import (Segment, SegmentList) cls = kwargs.pop('target', TimeSeries) # open cache from file if given if isinstance(cache, (unicode, str, file)): cache = open_cache(cache) # fudge empty cache if len(cache) == 0: return cls([], channel=channel, epoch=start) # use cache to get start end times cache.sort(key=lambda ce: ce.segment[0]) if start is None: start = cache[0].segment[0] if end is None: end = cache[-1].segment[1] # get span span = Segment(start, end) if cls not in (StateVector, StateVectorDict) and resample: cache = cache.sieve(segment=span.protract(8)) else: cache = cache.sieve(segment=span) cspan = Segment(cache[0].segment[0], cache[-1].segment[1]) # check for gaps if gap is None and pad is not None: gap = 'pad' elif gap is None: gap = 'raise' segs = cache_segments(cache, on_missing='ignore') & SegmentList([span]) if len(segs) != 1 and gap.lower() == 'ignore' or gap.lower() == 'pad': pass elif len(segs) != 1: gaps = SegmentList([cspan]) - segs msg = ("The cache given to %s.read has gaps in it in the " "following segments:\n %s" % (cls.__name__, '\n '.join(map(str, gaps)))) if gap.lower() == 'warn': warnings.warn(msg) else: raise ValueError(msg) segs = type(segs)([span]) # if reading a small number of channels, try to use lalframe, its faster if format is None and (isinstance(channel, str) or (isinstance(channel, (list, tuple)) and len(channel) <= MAX_LALFRAME_CHANNELS)): try: from lalframe import frread except ImportError: format = 'gwf' else: kwargs.pop('type', None) format = 'lalframe' # otherwise use the file extension as the format elif format is None: format = os.path.splitext(cache[0].path)[1][1:] # -- process multiple cache segments -------- # this entry point loops this method for each segment if len(segs) > 1: out = None for seg in segs: new = read_cache(cache, channel, start=seg[0], end=seg[1], resample=resample, nproc=nproc, format=format, target=cls, **kwargs) if out is None: out = new else: out.append(new, gap='pad', pad=pad) return out # -- process single cache segment # force one frame per process minimum nproc = min(nproc, len(cache)) # single-process if nproc <= 1: return cls.read(cache, channel, format=format, start=start, end=end, resample=resample, **kwargs) # define how to read each frame def _read(q, pstart, pend): try: # don't go beyond the requested limits pstart = float(max(start, pstart)) pend = float(min(end, pend)) # if resampling TimeSeries, pad by 8 seconds inside cache limits if cls not in (StateVector, StateVectorDict) and resample: cstart = float(max(cspan[0], pstart - 8)) subcache = cache.sieve(segment=Segment(cstart, pend)) out = cls.read(subcache, channel, format=format, start=cstart, end=pend, resample=None, **kwargs) out = out.resample(resample) q.put(out.crop(pstart, pend)) else: subcache = cache.sieve(segment=Segment(pstart, pend)) q.put( cls.read(subcache, channel, format=format, start=pstart, end=pend, resample=resample, **kwargs)) except Exception as e: q.put(e) # separate cache into parts fperproc = int(ceil(len(cache) / nproc)) subcaches = [ Cache(cache[i:i + fperproc]) for i in range(0, len(cache), fperproc) ] subsegments = SegmentList( [Segment(c[0].segment[0], c[-1].segment[1]) for c in subcaches]) # start all processes queue = ProcessQueue(nproc) proclist = [] for subseg in subsegments: process = Process(target=_read, args=(queue, subseg[0], subseg[1])) process.daemon = True proclist.append(process) process.start() # get data and block data = [queue.get() for p in proclist] for result in data: process.join() if isinstance(result, Exception): raise result # format and return if issubclass(cls, dict): try: data.sort(key=lambda tsd: tsd.values()[0].epoch.gps) except IndexError: pass out = cls() while len(data): tsd = data.pop(0) out.append(tsd) del tsd return out else: if cls in (TimeSeries, TimeSeriesDict): out = TimeSeriesList(*data) else: out = StateVectorList(*data) out.sort(key=lambda ts: ts.epoch.gps) ts = out.join(gap=gap) return ts
def find_frames(ifo, frametype, gpsstart, gpsend, config=ConfigParser(), urltype='file', gaps='warn', onerror='raise'): """Query the datafind server for GWF files for the given type """ vprint(' Finding %s-%s frames for [%d, %d)...' % (ifo[0], frametype, int(gpsstart), int(gpsend))) # find datafind host:port try: host = config.get('datafind', 'server') except (NoOptionError, NoSectionError): try: host = os.environ['LIGO_DATAFIND_SERVER'] except KeyError: host = None port = None else: try: host, port = host.rsplit(':', 1) except ValueError: port = None else: port = int(port) else: port = config.getint('datafind', 'port') # get credentials if port == 80: cert = None key = None else: cert, key = datafind.find_credential() # XXX HACK: LLO changed frame types on Dec 6 2013: LLOCHANGE = 1070291904 if re.match('L1_{CRMT}', frametype) and gpsstart < LLOCHANGE: frametype = frametype[-1] # query frames ifo = ifo[0].upper() gpsstart = int(floor(gpsstart)) gpsend = int(ceil(min(globalv.NOW, gpsend))) if gpsend <= gpsstart: return Cache() def _query(): if cert is not None: dfconn = datafind.GWDataFindHTTPSConnection( host=host, port=port, cert_file=cert, key_file=key) else: dfconn = datafind.GWDataFindHTTPConnection(host=host, port=port) return dfconn.find_frame_urls(ifo[0].upper(), frametype, gpsstart, gpsend, urltype=urltype, on_gaps=gaps) try: cache = _query() except RuntimeError as e: sleep(1) try: cache = _query() except RuntimeError: if 'Invalid GPS times' in str(e): e.args = ('%s: %d ... %s' % (str(e), gpsstart, gpsend),) if onerror in ['ignore', None]: pass elif onerror in ['warn']: warnings.warn('Caught %s: %s' % (type(e).__name__, str(e))) else: raise cache = Cache() # XXX: if querying for day of LLO frame type change, do both if (ifo[0].upper() == 'L' and frametype in ['C', 'R', 'M', 'T'] and gpsstart < LLOCHANGE < gpsend): start = len(cache) and cache[-1].segment[1] or gpsstart if start < gpsend: cache.extend(dfconn.find_frame_urls(ifo[0].upper(), 'L1_%s' % frametype, start, gpsend, urltype=urltype, on_gaps=gaps)[1:]) cache, _ = cache.checkfilesexist() vprint(' %d found.\n' % len(cache)) return cache
def find_dmt_cache(start, end, ifo, check_files=False, **kwargs): """Find DMTOmega files for the given GPS period. @param start GPS start time for search @param end GPS end time for search @param ifo observatory for search @param check_files check that the returned files can be read on disk, default False @param kwargs UNDOCUMENTED """ out = Cache() # verify host host = {'G1': 'atlas', 'H1': 'ligo-wa', 'H2': 'ligo-wa', 'L1': 'ligo-la'} if (not kwargs.has_key('directory') and not re.search(host[ifo], getfqdn())): sys.stderr.write("WARNING: Omega online files are not available for " "IFO=%s on this host." % ifo) sys.stderr.flush() return out span = segments.segment(start, end) # set known epochs known_epochs = { 1031340854: 55, 1041657635: 55, 1041669472: 55, 1041682187: 55, 1044093810: 38, 1044111232: 38, 1044111282: 38, 1044112180: 38, 1057700030: 38, 1057722672: 38 } # get parameters epoch = kwargs.pop("epoch", sorted(known_epochs.keys())) dt = kwargs.pop("duration", 55) try: iter(epoch) except TypeError: epoch = [epoch] overlap = kwargs.pop("overlap", 0) directory = kwargs.pop( "duration", "/gds-%s/dmt/triggers/%s-Omega_Triggers" % (ifo.lower(), ifo[0].upper())) # optimise append = out.append splitext = os.path.splitext isfile = os.path.isfile intersects = span.intersects segment = segments.segment from_T050017 = CacheEntry.from_T050017 # get times epoch_idx = bisect.bisect_right(epoch, start) - 1 print epoch_idx try: dt = known_epochs[epoch[epoch_idx]] except KeyError: dt = 38 next_epoch = len(epoch) >= epoch_idx + 2 and epoch[epoch_idx + 1] or 0 start_time = int(start - numpy.mod(start - epoch[epoch_idx], dt - overlap)) t = start_time def _omega_file(gps, ifo, deltaT): return ("%s/%s-OMEGA_TRIGGERS_CLUSTER-%.5s/" "%s-OMEGA_TRIGGERS_CLUSTER-%.10d-%d.xml" % (directory, ifo.upper(), gps, ifo.upper(), gps, deltaT)) # loop over time segments constructing file paths while t < end: fp = _omega_file(t, ifo, dt) if (intersects(segment(t, t + dt)) and (not check_files or isfile(fp))): append(from_T050017(fp)) t += dt - overlap if next_epoch and t > next_epoch: try: dt = known_epochs[next_epoch] except KeyError: dt = 55 t = next_epoch epoch_idx += 1 next_epoch = len(epoch) >= epoch_idx + 2 and epoch[epoch_idx + 1] or 0 out.sort(key=lambda e: e.path) return out
def process(self, config=GWSummConfigParser(), **kwargs): # set params self.rounds = None if not os.path.isdir(self.directory): self.rounds = None return # get some basic info ifo = config.get('DEFAULT', 'ifo') # read the configuration d = os.path.realpath(self.directory).rstrip('/') self.conf = dict() confs = glob(os.path.join(d, '%s-HVETO_CONF-*-*.txt' % ifo)) if len(confs) != 1: self.rounds = 'FAIL' return conffile = confs[0] try: with open(conffile) as f: self.conf = dict() lines = f.readlines()[3:] for line in lines: try: key, val = line.split(': ', 1) self.conf[key.strip()] = eval(val) except (ValueError, SyntaxError, NameError): pass except IOError: self.rounds = 'FAIL' return else: etg = self.conf.pop('AUXtype', None) if 'DEfnm' in self.conf: name = re_quote.sub('', self.conf['DEfnm']) self.primary = '%s:%s' % (ifo, name) if 'DEtype' in self.conf: hetg = re_quote.sub('', self.conf['DEtype']) if re.search('_%s\Z' % hetg, self.primary, re.I): self.primary = self.primary[:-len(hetg) - 1] else: self.primary = None # find the segments try: ce = CacheEntry.from_T050017(conffile) except (ValueError): start = int(self.span[0]) duration = int(abs(self.span)) span = self.span else: start = int(ce.segment[0]) duration = int(abs(ce.segment)) span = ce.segment try: statefile = self.conf['dqfnm'] except KeyError: statefile = '%s-HVETO_DQ_SEGS-%d-%d.txt' % (ifo, start, duration) if not os.path.isfile(os.path.join(self.directory, statefile)): self.rounds = 'NOSEGMENTS' return # find the results table resultsfile = os.path.join(self.directory, 'summary_stats.txt') if not os.path.isfile(resultsfile): self.rounds = 'FAIL' return # determine the Hveto state cache = Cache( [CacheEntry.from_T050017(os.path.join(self.directory, statefile))]) segments = SegmentList.read(cache) globalv.SEGMENTS[self.states[0].definition] = DataQualityFlag( self.states[0].definition, known=[span], active=segments) self.finalize_states(config=config, query=False) # read results file self.rounds = [] with open(resultsfile, 'r') as f: for line in f.readlines(): self.rounds.append( dict(zip(self.summaryrows, line.split(' ')[1:]))) # fix channel name c = '%s:%s' % (ifo, self.rounds[-1]['Winning channel']) if etg and re.search('_%s\Z' % etg, c, re.I): c = c.rsplit('_', 1)[0] self.rounds[-1]['Winning channel'] = c # read starting triggers rawfile = ('%s-HVETO_RAW_TRIGS_ROUND_0-%d-%d.txt' % (ifo, start, duration)) cache = Cache( [CacheEntry.from_T050017(os.path.join(self.directory, rawfile))]) get_triggers('%s:hveto_start' % ifo, 'hveto', [self.span], config=config, cache=cache, tablename='sngl_burst', return_=False) get_triggers('%s:hveto_vetoed_all' % ifo, 'hveto', [self.span], config=config, cache=Cache(), tablename='sngl_burst') for r in range(1, len(self.rounds) + 1): # read round veto triggers rawfile = ('%s-HVETO_VETOED_TRIGS_ROUND_%d-%d-%d.txt' % (ifo, r, start, duration)) cache = Cache([ CacheEntry.from_T050017(os.path.join(self.directory, rawfile)) ]) trigs = get_triggers('%s:hveto_vetoed_round %d' % (ifo, r), 'hveto', [self.span], config=config, cache=cache, tablename='sngl_burst') globalv.TRIGGERS['%s:hveto_vetoed_all,hveto' % ifo].extend(trigs) # read round veto segments segfile = ('%s-HVETO_VETO_SEGS_ROUND_%d-%d-%d.txt' % (ifo, r, start, duration)) cache = Cache([ CacheEntry.from_T050017(os.path.join(self.directory, segfile)) ]) get_segments('%s:hveto_veto_segs_round_%d' % (ifo, r), [self.span], config=config, cache=cache, return_=False) for plot in self.plots: if isinstance(plot, HvetoSegmentSummaryPlot): plot.find_flags() kwargs['trigcache'] = Cache() kwargs['segmentcache'] = Cache() super(HvetoTab, self).process(config=config, **kwargs) # find some plots for plot in ['OVERAL_HISTOGRAM', 'OVERAL_EFF_DT'][::-1]: filename = ('%s-HVETO_%s-%d-%d.png' % (ifo, plot, start, duration)) plotfile = os.path.join(self.directory, filename) if os.path.isfile(plotfile): p = SummaryPlot(os.path.join(self.url, filename), new=False) p.state = self.states[0] self.plots.insert(0, p) # delete data from archive del globalv.SEGMENTS[self.states[0].definition] for row in range(1, len(self.rounds) + 1): del globalv.SEGMENTS['%s:hveto_veto_segs_round_%s' % (ifo, row)]
def process(self, nds=None, nproc=1, config=GWSummConfigParser(), datacache=None, segmentcache=Cache(), datafind_error='raise', **kwargs): """Process data for the given state. """ # finalize state information self.finalize_states(config=config, segdb_error=kwargs.get('segdb_error', 'raise'), datafind_error=datafind_error) vprint("States finalised [%d total]\n" % len(self.states)) vprint(" Default state: %r\n" % str(self.defaultstate)) # remove plots that have already been generated for p in self.plots: if p.outputfile in globalv.WRITTEN_PLOTS: p.new = False # -------------------------------------------------------------------- # work out which channels are needed prefix = '%s:GRD-%s_%%s' % (self.ifo, self.node) state = sorted(self.states, key=lambda s: abs(s.active))[-1] prefices = ['STATE_N', 'REQUEST_N', 'NOMINAL_N', 'OK', 'MODE', 'OP'] alldata = list( get_timeseries_dict([prefix % x for x in prefices], state, config=config, nds=nds, nproc=nproc, cache=datacache, datafind_error=datafind_error, dtype='int32').values()) vprint(" All time-series data loaded\n") # -------------------------------------------------------------------- # find segments and transitions self.transitions = dict((v, []) for v in self.grdstates) for sdata, rdata, ndata, okdata in zip(*alldata[:4]): ssegs = DataQualityDict() rsegs = DataQualityDict() nsegs = DataQualityDict() oksegs = (okdata == 1).to_dqflag(name='Node OK') for v, name in self.grdstates.items(): # get segments for state tag = self.segmenttag % name instate = sdata == v ssegs[tag] = instate.to_dqflag(name=name) diff_ = numpy.diff(instate.value.astype(int)) transin = (diff_ == 1).nonzero()[0] + 1 transout = (diff_ == -1).nonzero()[0] + 1 for i, j in zip(transin, transout): t = sdata.times[i].value from_ = sdata[i - 1].value to_ = sdata[j].value self.transitions[v].append((t, from_, to_)) # get segments for request tag = self.segmenttag % name + REQUESTSTUB instate = rdata == v rsegs[tag] = instate.to_dqflag(name=name) # get segments for nominal tag = self.segmenttag % name + NOMINALSTUB nom = ndata == v nsegs[tag] = nom.to_dqflag(name=name) globalv.SEGMENTS += ssegs globalv.SEGMENTS += rsegs globalv.SEGMENTS += nsegs globalv.SEGMENTS += {self.segmenttag % 'OK': oksegs} super(GuardianTab, self).process(config=config, nds=nds, nproc=nproc, datacache=datacache, segmentcache=segmentcache, **kwargs)