def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow, dyn_range_factor=1., precision=None): """Generate a set of overlapping PSDs covering the data in GWstrain. Then associate these PSDs with the appropriate segment in strain_segments. Parameters ----------- opt : object Result of parsing the CLI with OptionParser, or any object with the required attributes (psd_model, psd_file, asd_file, psd_estimation, psd_segment_length, psd_segment_stride, psd_inverse_length, psd_output). fd_segments : StrainSegments.fourier_segments() object The fourier transforms of the various analysis segments. The psd attribute of each segment is updated to point to the appropriate PSD. gwstrain : Strain object The timeseries of raw data on which to estimate PSDs. flen : int The length in samples of the output PSDs. delta_f : float The frequency step of the output PSDs. flow: float The low frequncy cutoff to use when calculating the PSD. dyn_range_factor : {1, float} For PSDs taken from models or text files, if `dyn_range_factor` is not None, then the PSD is multiplied by `dyn_range_factor` ** 2. precision : str, choices (None,'single','double') If not specified, or specified as None, the precision of the returned PSD will match the precision of the data, if measuring a PSD, or will match the default precision of the model if using an analytical PSD. If 'single' the PSD will be converted to float32, if not already in that precision. If 'double' the PSD will be converted to float64, if not already in that precision. """ psds_and_times = generate_overlapping_psds(opt, gwstrain, flen, delta_f, flow, dyn_range_factor=dyn_range_factor, precision=precision) for fd_segment in fd_segments: best_psd = None psd_overlap = 0 inp_seg = segments.segment(fd_segment.seg_slice.start, fd_segment.seg_slice.stop) for start_idx, end_idx, psd in psds_and_times: psd_seg = segments.segment(start_idx, end_idx) if psd_seg.intersects(inp_seg): curr_overlap = abs(inp_seg & psd_seg) if curr_overlap > psd_overlap: psd_overlap = curr_overlap best_psd = psd if best_psd is None: err_msg = "No PSDs found intersecting segment!" raise ValueError(err_msg) fd_segment.psd = best_psd
def pad_and_truncate(row_start, row_end): tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)]) # No coalesce needed as a list with a single segment is already coalesced tmp &= search_span_list # The intersection is guaranteed to be non-empty if the row passed match() # PR 2969: The above comment is incorrect. Negative padding may cause # an empty intersection. if len(tmp) == 0: return segment(0,0) else: return tmp[0]
def build_segment_list_one(engine, gps_start_time, gps_end_time, ifo, segment_name, version = None, start_pad = 0, end_pad = 0): """Builds a list of segments satisfying the given criteria """ seg_result = segmentlist([]) sum_result = segmentlist([]) # Is there any way to get segment and segement summary in one query? # Maybe some sort of outer join where we keep track of which segment # summaries we've already seen. sql = "SELECT segment_summary.start_time, segment_summary.end_time " sql += "FROM segment_definer, segment_summary " sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id " sql += "AND segment_definer.ifos = '%s' " % ifo if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for sum_start_time, sum_end_time in rows: sum_start_time = (sum_start_time < gps_start_time) and gps_start_time or sum_start_time sum_end_time = (sum_end_time > gps_end_time) and gps_end_time or sum_end_time sum_result |= segmentlist([segment(sum_start_time, sum_end_time)]) # We can't use queries paramaterized with ? since the ldbd protocol doesn't support it... sql = "SELECT segment.start_time + %d, segment.end_time + %d " % (start_pad, end_pad) sql += "FROM segment, segment_definer " sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id " if engine.__class__ == query_engine.LdbdQueryEngine: sql += "AND segment.segment_def_cdb = segment_definer.creator_db " sql += "AND segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % segment_name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time) rows = engine.query(sql) for seg_start_time, seg_end_time in rows: seg_start_time = (seg_start_time < gps_start_time) and gps_start_time or seg_start_time seg_end_time = (seg_end_time > gps_end_time) and gps_end_time or seg_end_time seg_result |= segmentlist([segment(seg_start_time, seg_end_time)]) engine.close() return sum_result, seg_result
def from_T050017(cls, url, coltype = LIGOTimeGPS): """ Parse a URL in the style of T050017-00 into a CacheEntry. The T050017-00 file name format is, essentially, observatory-description-start-duration.extension Example: >>> c = CacheEntry.from_T050017("file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf") >>> c.observatory 'L' >>> c.host 'localhost' >>> os.path.basename(c.path) 'L-L1_RDS_C03_L2-836562330-83.gwf' """ match = cls._url_regex.search(url) if not match: raise ValueError("could not convert %s to CacheEntry" % repr(url)) observatory = match.group("obs") description = match.group("dsc") start = match.group("strt") duration = match.group("dur") if start == "-" and duration == "-": # no segment information segment = None else: segment = segments.segment(coltype(start), coltype(start) + coltype(duration)) return cls(observatory, description, segment, url)
def expand_version_number(engine, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef if version != '*': return [segdef] # Start looking at the full interval intervals = segmentlist([segment(start_time, end_time)]) # Find the maximum version number sql = "SELECT max(version) FROM segment_definer " sql += "WHERE segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name rows = engine.query(sql) try: version = len(rows[0]) and rows[0][0] or 1 except: version = None results = [] while version > 0: for interval in intervals: segs = query_segments(engine, 'segment_summary', [(ifo, name, version, interval[0], interval[1], 0, 0)]) for seg in segs[0]: results.append( (ifo, name, version, seg[0], seg[1], 0, 0) ) intervals.coalesce() intervals -= segs[0] version -= 1 return results
def find_segments(doc, key, use_segment_table = True): key_pieces = key.split(':') while len(key_pieces) < 3: key_pieces.append('*') filter_func = lambda x: str(x.ifos) == key_pieces[0] and (str(x.name) == key_pieces[1] or key_pieces[1] == '*') and (str(x.version) == key_pieces[2] or key_pieces[2] == '*') # Find all segment definers matching the critieria seg_def_table = table.get_table(doc, lsctables.SegmentDefTable.tableName) seg_defs = filter(filter_func, seg_def_table) seg_def_ids = map(lambda x: str(x.segment_def_id), seg_defs) # Find all segments belonging to those definers if use_segment_table: seg_table = table.get_table(doc, lsctables.SegmentTable.tableName) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_table) else: seg_sum_table = table.get_table(doc, lsctables.SegmentSumTable.tableName) seg_entries = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_sum_table) # Combine into a segmentlist ret = segmentlist(map(lambda x: segment(x.start_time, x.end_time), seg_entries)) ret.coalesce() return ret
def from_bitstream(bitstream, start, dt, minlen=1): """ Convert consecutive True values in a bit stream (boolean-castable iterable) to a stream of segments. Require minlen consecutive True samples to comprise a segment. Example: >>> list(from_bitstream((True, True, False, True, False), 0, 1)) [segment(0, 2), segment(3, 4)] >>> list(from_bitstream([[], [[]], [[]], [], []], 1013968613, 0.125)) [segment(1013968613.125, 1013968613.375)] """ bitstream = iter(bitstream) i = 0 while 1: if bitstream.next(): # found start of True block; find the end j = i + 1 try: while bitstream.next(): j += 1 finally: # make sure StopIteration doesn't kill final segment if j - i >= minlen: yield segments.segment(start + i * dt, start + j * dt) i = j # advance to end of block i += 1
def fromtama(file, coltype=lal.LIGOTimeGPS): """ Read a segmentlist from the file object file containing TAMA locked-segments data. Parsing stops on the first line that cannot be parsed (which is consumed). The segmentlist will be created with segments whose boundaries are of type coltype, which should raise ValueError if it cannot convert its string argument. NOTE: TAMA locked-segments files contain non-integer start and end times, so the default column type is set to LIGOTimeGPS. NOTE: the output is a segmentlist as described by the file; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. """ segmentpat = re.compile( r"\A\s*\S+\s+\S+\s+\S+\s+([\d.+-eE]+)\s+([\d.+-eE]+)") l = segments.segmentlist() for line in file: try: [tokens] = segmentpat.findall(line) l.append(segments.segment(map(coltype, tokens[0:2]))) except ValueError: break return l
def segmentlist_range(start, stop, period): """ Analogous to Python's range() builtin, this generator yields a sequence of continuous adjacent segments each of length "period" with the first starting at "start" and the last ending not after "stop". Note that the segments generated do not form a coalesced list (they are not disjoint). start, stop, and period can be any objects which support basic arithmetic operations. Example: >>> from pycbc_glue.segments import * >>> segmentlist(segmentlist_range(0, 15, 5)) [segment(0, 5), segment(5, 10), segment(10, 15)] >>> segmentlist(segmentlist_range('', 'xxx', 'x')) [segment('', 'x'), segment('x', 'xx'), segment('xx', 'xxx')] """ n = 1 b = start while True: a, b = b, start + n * period if b > stop: break yield segments.segment(a, b) n += 1
def fromfilenames(filenames, coltype=int): """ Return a segmentlist describing the intervals spanned by the files whose names are given in the list filenames. The segmentlist is constructed by parsing the file names, and the boundaries of each segment are coerced to type coltype. The file names are parsed using a generalization of the format described in Technical Note LIGO-T010150-00-E, which allows the start time and duration appearing in the file name to be non-integers. NOTE: the output is a segmentlist as described by the file names; if the file names are not in time order, or describe overlaping segments, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. """ pattern = re.compile(r"-([\d.]+)-([\d.]+)\.[\w_+#]+\Z") l = segments.segmentlist() for name in filenames: [(s, d)] = pattern.findall(name.strip().rstrip(".gz")) s = coltype(s) d = coltype(d) l.append(segments.segment(s, s + d)) return l
def legacy_get_valid_times(self): """ Return the length of data that the tmpltbank job will need to read and the part of that data that the template bank is valid for. In the case of lalapps_tmpltbank the following options are needed to set this up and will be used by the Executable to figure this out: * --pad-data (seconds, amount of data used to pad the analysis region. This is needed as some data will be corrupted from the data conditioning process) * --segment-length (sample points, length of each analysis segment) * --sample-rate (Hz, number of sample points per second. The data will be resampled to this value if necessary * --number-of-segments (Number of analysis segments, note that overlapping segments are used for PSD estimation, so every data point will appear in two segments, except the first segment-length/4 and last segment-length/4 points.) Returns ------- dataLength : float (seconds) The length of data that the job will need validChunk : glue.glue.segments.segment The start and end of the dataLength that is valid for the template bank. """ # Read in needed options. This will fail if options not present # It will search relevant sub-sections for the option, so this can be # set differently for each ifo. padData = int(self.get_opt('pad-data')) segmentLength = float(self.get_opt('segment-length')) sampleRate = float(self.get_opt('sample-rate')) numSegments = int(self.get_opt('number-of-segments')) # Calculate total valid duration analysisDur = int(segmentLength / sampleRate) * (numSegments + 1) / 2 if (segmentLength % sampleRate): errString = "In tmpltbank, when running lalapps_tmpltbank " errString += "segment-length must be a multiple of sample-rate." raise ValueError(errString) # Set the segments dataLength = analysisDur + 2 * padData validStart = padData validEnd = analysisDur + padData # If this is inspiral we lose segment-length/4 on start and end if self.name == 'inspiral': # Don't think inspiral will do well if segmentLength/4 is not # an integer validStart = validStart + int(segmentLength / (sampleRate * 4)) validEnd = validEnd - int(segmentLength / (sampleRate * 4)) validChunk = segments.segment([validStart, validEnd]) return [dataLength], [validChunk]
def from_range_strings(ranges, boundtype=int): """ Parse a list of ranges expressed as strings in the form "value" or "first:last" into an equivalent pycbc_glue.segments.segmentlist. In the latter case, an empty string for "first" and(or) "last" indicates a (semi)infinite range. A typical use for this function is in parsing command line options or entries in configuration files. NOTE: the output is a segmentlist as described by the strings; if the segments in the input file are not coalesced or out of order, then thusly shall be the output of this function. It is recommended that this function's output be coalesced before use. Example: >>> text = "0:10,35,100:" >>> from_range_strings(text.split(",")) [segment(0, 10), segment(35, 35), segment(100, infinity)] """ # preallocate segmentlist segs = segments.segmentlist([None] * len(ranges)) # iterate over strings for i, range in enumerate(ranges): parts = range.split(":") if len(parts) == 1: parts = boundtype(parts[0]) segs[i] = segments.segment(parts, parts) continue if len(parts) != 2: raise ValueError(range) if parts[0] == "": parts[0] = segments.NegInfinity else: parts[0] = boundtype(parts[0]) if parts[1] == "": parts[1] = segments.PosInfinity else: parts[1] = boundtype(parts[1]) segs[i] = segments.segment(parts[0], parts[1]) # success return segs
def create_node(self, coinc_files, tags=None): if tags is None: tags = [] segs = coinc_files.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(5000) node.add_input_list_opt('--coinc-files', coinc_files) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def get_segment_summary_times(scienceFile, segmentName): """ This function will find the times for which the segment_summary is set for the flag given by segmentName. Parameters ----------- scienceFile : SegFile The segment file that we want to use to determine this. segmentName : string The DQ flag to search for times in the segment_summary table. Returns --------- summSegList : glue.segments.segmentlist The times that are covered in the segment summary table. """ # Parse the segmentName segmentName = segmentName.split(':') if not len(segmentName) in [2, 3]: raise ValueError("Invalid channel name %s." % (segmentName)) ifo = segmentName[0] channel = segmentName[1] version = '' if len(segmentName) == 3: version = int(segmentName[2]) # Load the filename xmldoc = utils.load_filename( scienceFile.cache_entry.path, gz=scienceFile.cache_entry.path.endswith("gz"), contenthandler=ContentHandler) # Get the segment_def_id for the segmentName segmentDefTable = table.get_table(xmldoc, "segment_definer") for entry in segmentDefTable: if (entry.ifos == ifo) and (entry.name == channel): if len(segmentName) == 2 or (entry.version == version): segDefID = entry.segment_def_id break else: raise ValueError("Cannot find channel %s in segment_definer table."\ %(segmentName)) # Get the segmentlist corresponding to this segmentName in segment_summary segmentSummTable = table.get_table(xmldoc, "segment_summary") summSegList = segments.segmentlist([]) for entry in segmentSummTable: if entry.segment_def_id == segDefID: segment = segments.segment(entry.start_time, entry.end_time) summSegList.append(segment) summSegList.coalesce() return summSegList
def create_node(self, coinc_files, tags=None): if tags is None: tags = [] segs = coinc_files.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(5000) node.add_input_list_opt('--coinc-files', coinc_files) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def get_segment_summary_times(scienceFile, segmentName): """ This function will find the times for which the segment_summary is set for the flag given by segmentName. Parameters ----------- scienceFile : SegFile The segment file that we want to use to determine this. segmentName : string The DQ flag to search for times in the segment_summary table. Returns --------- summSegList : glue.segments.segmentlist The times that are covered in the segment summary table. """ # Parse the segmentName segmentName = segmentName.split(':') if not len(segmentName) in [2,3]: raise ValueError("Invalid channel name %s." %(segmentName)) ifo = segmentName[0] channel = segmentName[1] version = '' if len(segmentName) == 3: version = int(segmentName[2]) # Load the filename xmldoc = utils.load_filename(scienceFile.cache_entry.path, gz=scienceFile.cache_entry.path.endswith("gz"), contenthandler=ContentHandler) # Get the segment_def_id for the segmentName segmentDefTable = table.get_table(xmldoc, "segment_definer") for entry in segmentDefTable: if (entry.ifos == ifo) and (entry.name == channel): if len(segmentName) == 2 or (entry.version==version): segDefID = entry.segment_def_id break else: raise ValueError("Cannot find channel %s in segment_definer table."\ %(segmentName)) # Get the segmentlist corresponding to this segmentName in segment_summary segmentSummTable = table.get_table(xmldoc, "segment_summary") summSegList = segments.segmentlist([]) for entry in segmentSummTable: if entry.segment_def_id == segDefID: segment = segments.segment(entry.start_time, entry.end_time) summSegList.append(segment) summSegList.coalesce() return summSegList
def create_node(self, zerolag, full_data, injfull, fullinj, tags=None): if tags is None: tags = [] segs = zerolag.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(5000) node.add_input_list_opt('--zero-lag-coincs', zerolag) node.add_input_list_opt('--full-data-background', full_data) node.add_input_list_opt('--mixed-coincs-inj-full', injfull) node.add_input_list_opt('--mixed-coincs-full-inj', fullinj) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def create_node(self, zerolag, full_data, injfull, fullinj, tags=None): if tags is None: tags = [] segs = zerolag.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(5000) node.add_input_list_opt('--zero-lag-coincs', zerolag) node.add_input_list_opt('--full-data-background', full_data) node.add_input_list_opt('--mixed-coincs-inj-full', injfull) node.add_input_list_opt('--mixed-coincs-full-inj', fullinj) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def get_valid_times(self): pad_data = int(self.get_opt('pad-data')) if self.has_opt('analyse-segment-end'): safety = 1 deadtime = int(self.get_opt('segment-duration')) / 2 spec_len = int(self.get_opt('inverse-spec-length')) / 2 valid_start = self.data_seg[0] + deadtime - spec_len + pad_data \ - safety valid_end = self.data_seg[1] - spec_len - pad_data - safety else: overlap = int(self.get_opt('segment-duration')) / 4 valid_start = self.data_seg[0] + overlap + pad_data valid_end = self.data_seg[1] - overlap - pad_data return self.data_seg, segments.segment(valid_start, valid_end)
def S2playground(extent): """ Return a segmentlist identifying the S2 playground times within the interval defined by the segment extent. Example: >>> from pycbc_glue import segments >>> S2playground(segments.segment(874000000, 874010000)) [segment(874000013, 874000613), segment(874006383, 874006983)] """ lo = int(extent[0]) lo -= (lo - 729273613) % 6370 hi = int(extent[1]) + 1 return segments.segmentlist( segments.segment(t, t + 600) for t in range(lo, hi, 6370)) & segments.segmentlist([extent])
def create_node(self, trig_files, bank_file, stat_files, veto_file, veto_name, template_str, tags=None): if tags is None: tags = [] segs = trig_files.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(10000) node.add_input_opt('--template-bank', bank_file) node.add_input_list_opt('--trigger-files', trig_files) if len(stat_files) > 0: node.add_input_list_opt('--statistic-files', stat_files) if veto_file is not None: node.add_input_opt('--veto-files', veto_file) node.add_opt('--segment-name', veto_name) node.add_opt('--template-fraction-range', template_str) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def columns_from_file_list(file_list, columns, ifo, start, end): """ Return columns of information stored in single detector trigger files. Parameters ---------- file_list_file : string pickle file containing the list of single detector triggers. ifo : string The ifo to return triggers for. columns : list of strings The list of columns to read from the trigger files. start : int The start time to get triggers from end : int The end time to get triggers from Returns ------- trigger_dict : dict A dictionary of column vectors with column names as keys. """ file_list = file_list.find_output_with_ifo(ifo) file_list = file_list.find_all_output_in_range(ifo, segment(start, end)) trig_dict = {} for trig_file in file_list: f = h5py.File(trig_file.storage_path, 'r') time = f['end_time'][:] pick = numpy.logical_and(time < end, time > start) pick_loc = numpy.where(pick)[0] for col in columns: if col not in trig_dict: trig_dict[col] = [] trig_dict[col] = numpy.concatenate( [trig_dict[col], f[col][:][pick_loc]]) return trig_dict
def __init__(self, cp, name, universe=None, ifo=None, injection_file=None, gate_files=None, out_dir=None, tags=None): if tags is None: tags = [] super(LegacyCohPTFInspiralExecutable, self).__init__(cp, name, universe, ifo, out_dir=out_dir, tags=tags) self.injection_file = injection_file self.data_seg = segments.segment(int(cp.get('workflow', 'start-time')), int(cp.get('workflow', 'end-time'))) self.num_threads = 1
def columns_from_file_list(file_list, columns, ifo, start, end): """ Return columns of information stored in single detector trigger files. Parameters ---------- file_list_file : string pickle file containing the list of single detector triggers. ifo : string The ifo to return triggers for. columns : list of strings The list of columns to read from the trigger files. start : int The start time to get triggers from end : int The end time to get triggers from Returns ------- trigger_dict : dict A dictionary of column vectors with column names as keys. """ file_list = file_list.find_output_with_ifo(ifo) file_list = file_list.find_all_output_in_range(ifo, segment(start, end)) trig_dict = {} for trig_file in file_list: f = h5py.File(trig_file.storage_path, 'r') time = f['end_time'][:] pick = numpy.logical_and(time < end, time > start) pick_loc = numpy.where(pick)[0] for col in columns: if col not in trig_dict: trig_dict[col] = [] trig_dict[col] = numpy.concatenate([trig_dict[col], f[col][:][pick_loc]]) return trig_dict
def create_node(self, trig_files, bank_file, stat_files, veto_file, veto_name, template_str, tags=None): if tags is None: tags = [] segs = trig_files.get_times_covered_by_files() seg = segments.segment(segs[0][0], segs[-1][1]) node = Node(self) node.set_memory(10000) node.add_input_opt('--template-bank', bank_file) node.add_input_list_opt('--trigger-files', trig_files) if len(stat_files) > 0: node.add_input_list_opt('--statistic-files', stat_files) if veto_file is not None: node.add_input_opt('--veto-files', veto_file) node.add_opt('--segment-name', veto_name) node.add_opt('--template-fraction-range', template_str) node.new_output_file_opt(seg, '.hdf', '--output-file', tags=tags) return node
def __init__(self, *args, **kwargs): """ Intialize a CacheEntry object. The arguments can take two forms: a single string argument, which is interpreted and parsed as a line from a LAL cache file, or four arguments used to explicitly initialize the observatory, description, segment and URL in that order. When parsing a single line of text from a LAL cache, an optional key-word argument "coltype" can be provided to set the type the start and durations are parsed as. The default is pycbc_glue.lal.LIGOTimeGPS. Example: >>> c = CacheEntry("H1", "S5", segments.segment(815901601, 815902177.5), "file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> print c.segment [815901601 ... 815902177.5) >>> print str(c) H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> print c.segment [815901601 ... 815902177.5) >>> print CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment [815901601.0 ... 815902177.5) See also the .from_T050017() class method for an alternative initialization mechanism. """ if len(args) == 1: # parse line of text as an entry in a cache file match = self._regex.search(args[0]) try: match = match.groupdict() except AttributeError: raise ValueError("could not convert %s to CacheEntry" % repr(args[0])) self.observatory = match["obs"] self.description = match["dsc"] start = match["strt"] duration = match["dur"] coltype = kwargs.pop("coltype", LIGOTimeGPS) if start == "-" and duration == "-": # no segment information self.segment = None else: start = coltype(start) self.segment = segments.segment(start, start + coltype(duration)) self.url = match["url"] if kwargs: raise TypeError("unrecognized keyword arguments: %s" % ", ".join(kwargs)) elif len(args) == 4: # parse arguments as observatory, description, # segment, url if kwargs: raise TypeError("invalid arguments: %s" % ", ".join(kwargs)) self.observatory, self.description, self.segment, self.url = args else: raise TypeError("invalid arguments: %s" % args) # "-" indicates an empty column if self.observatory == "-": self.observatory = None if self.description == "-": self.description = None
def start_end_to_segments(start, end): return segmentlist([segment(s, e) for s, e in zip(start, end)])
def start_end_to_segments(start, end): return segmentlist([segment(s, e) for s, e in zip(start, end)])
def run_datafind_instance(cp, outputDir, connection, observatory, frameType, startTime, endTime, ifo, tags=None): """ This function will query the datafind server once to find frames between the specified times for the specified frame type and observatory. Parameters ---------- cp : ConfigParser instance Source for any kwargs that should be sent to the datafind module outputDir : Output cache files will be written here. We also write the commands for reproducing what is done in this function to this directory. connection : datafind connection object Initialized through the glue.datafind module, this is the open connection to the datafind server. observatory : string The observatory to query frames for. Ex. 'H', 'L' or 'V'. NB: not 'H1', 'L1', 'V1' which denote interferometers. frameType : string The frame type to query for. startTime : int Integer start time to query the datafind server for frames. endTime : int Integer end time to query the datafind server for frames. ifo : string The interferometer to use for naming output. Ex. 'H1', 'L1', 'V1'. Maybe this could be merged with the observatory string, but this could cause issues if running on old 'H2' and 'H1' data. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniquify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- dfCache : glue.lal.Cache instance The glue.lal.Cache representation of the call to the datafind server and the returned frame files. cacheFile : pycbc.workflow.core.File Cache file listing all of the datafind output files for use later in the pipeline. """ if tags is None: tags = [] seg = segments.segment([startTime, endTime]) # Take the datafind kwargs from config (usually urltype=file is # given). dfKwargs = {} # By default ignore missing frames, this case is dealt with outside of here dfKwargs['on_gaps'] = 'ignore' if cp.has_section("datafind"): for item, value in cp.items("datafind"): dfKwargs[item] = value for tag in tags: if cp.has_section('datafind-%s' %(tag)): for item, value in cp.items("datafind-%s" %(tag)): dfKwargs[item] = value # It is useful to print the corresponding command to the logs # directory to check if this was expected. log_datafind_command(observatory, frameType, startTime, endTime, os.path.join(outputDir,'logs'), **dfKwargs) logging.debug("Asking datafind server for frames.") dfCache = connection.find_frame_urls(observatory, frameType, startTime, endTime, **dfKwargs) logging.debug("Frames returned") # workflow format output file cache_file = File(ifo, 'DATAFIND', seg, extension='lcf', directory=outputDir, tags=tags) cache_file.PFN(cache_file.cache_entry.path, site='local') dfCache.ifo = ifo # Dump output to file fP = open(cache_file.storage_path, "w") # FIXME: CANNOT use dfCache.tofile because it will print 815901601.00000 # as a gps time which is incompatible with the lal cache format # (and the C codes) which demand an integer. #dfCache.tofile(fP) for entry in dfCache: start = str(int(entry.segment[0])) duration = str(int(abs(entry.segment))) print("%s %s %s %s %s" \ % (entry.observatory, entry.description, start, duration, entry.url), file=fP) entry.segment = segments.segment(int(entry.segment[0]), int(entry.segment[1])) fP.close() return dfCache, cache_file
def run_datafind_instance(cp, outputDir, connection, observatory, frameType, startTime, endTime, ifo, tags=None): """ This function will query the datafind server once to find frames between the specified times for the specified frame type and observatory. Parameters ---------- cp : ConfigParser instance Source for any kwargs that should be sent to the datafind module outputDir : Output cache files will be written here. We also write the commands for reproducing what is done in this function to this directory. connection : datafind connection object Initialized through the glue.datafind module, this is the open connection to the datafind server. observatory : string The observatory to query frames for. Ex. 'H', 'L' or 'V'. NB: not 'H1', 'L1', 'V1' which denote interferometers. frameType : string The frame type to query for. startTime : int Integer start time to query the datafind server for frames. endTime : int Integer end time to query the datafind server for frames. ifo : string The interferometer to use for naming output. Ex. 'H1', 'L1', 'V1'. Maybe this could be merged with the observatory string, but this could cause issues if running on old 'H2' and 'H1' data. tags : list of string, optional (default=None) Use this to specify tags. This can be used if this module is being called more than once to give call specific configuration (by setting options in [workflow-datafind-${TAG}] rather than [workflow-datafind]). This is also used to tag the Files returned by the class to uniqueify the Files and uniquify the actual filename. FIXME: Filenames may not be unique with current codes! Returns -------- dfCache : glue.lal.Cache instance The glue.lal.Cache representation of the call to the datafind server and the returned frame files. cacheFile : pycbc.workflow.core.File Cache file listing all of the datafind output files for use later in the pipeline. """ if tags is None: tags = [] seg = segments.segment([startTime, endTime]) # Take the datafind kwargs from config (usually urltype=file is # given). dfKwargs = {} # By default ignore missing frames, this case is dealt with outside of here dfKwargs['on_gaps'] = 'ignore' if cp.has_section("datafind"): for item, value in cp.items("datafind"): dfKwargs[item] = value for tag in tags: if cp.has_section('datafind-%s' % (tag)): for item, value in cp.items("datafind-%s" % (tag)): dfKwargs[item] = value # It is useful to print the corresponding command to the logs # directory to check if this was expected. log_datafind_command(observatory, frameType, startTime, endTime, os.path.join(outputDir, 'logs'), **dfKwargs) logging.debug("Asking datafind server for frames.") dfCache = connection.find_frame_urls(observatory, frameType, startTime, endTime, **dfKwargs) logging.debug("Frames returned") # workflow format output file cache_file = File(ifo, 'DATAFIND', seg, extension='lcf', directory=outputDir, tags=tags) cache_file.PFN(cache_file.cache_entry.path, site='local') dfCache.ifo = ifo # Dump output to file fP = open(cache_file.storage_path, "w") # FIXME: CANNOT use dfCache.tofile because it will print 815901601.00000 # as a gps time which is incompatible with the lal cache format # (and the C codes) which demand an integer. #dfCache.tofile(fP) for entry in dfCache: start = str(int(entry.segment[0])) duration = str(int(abs(entry.segment))) print("%s %s %s %s %s" \ % (entry.observatory, entry.description, start, duration, entry.url), file=fP) entry.segment = segments.segment(int(entry.segment[0]), int(entry.segment[1])) fP.close() return dfCache, cache_file
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name, out_dir, coherent_seg=None, fail_criterion=None): ifos = wkflow.ifos if len(science_segs.keys()) == 0: extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")), int(wkflow.cp.get("workflow", "end-time"))) else: pltpad = [science_segs.extent_all()[1] - trigger_time, trigger_time - science_segs.extent_all()[0]] extent = segments.segmentlist([science_segs.extent_all(), segments.segment(trigger_time - pltpad[0], trigger_time + pltpad[1])]).extent() ifo_colors = {} for ifo in ifos: ifo_colors[ifo] = ifo_color(ifo) if ifo not in science_segs.keys(): science_segs[ifo] = segments.segmentlist([]) # Make plot fig, subs = plt.subplots(len(ifos), sharey=True) plt.xticks(rotation=20, ha='right') for sub, ifo in zip(subs, ifos): for seg in science_segs[ifo]: sub.add_patch(Rectangle((seg[0], 0.1), abs(seg), 0.8, facecolor=ifo_colors[ifo], edgecolor='none')) if coherent_seg: if len(science_segs[ifo]) > 0 and \ coherent_seg in science_segs[ifo]: sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange') sub.add_patch(Rectangle((coherent_seg[0], 0), abs(coherent_seg), 1, alpha=0.5, facecolor='orange', edgecolor='none')) else: sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange') sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--', c='orange', alpha=0.5) sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--', c='orange', alpha=0.5) else: sub.plot([trigger_time, trigger_time], [0, 1], ':k') if fail_criterion: if len(science_segs[ifo]) > 0: style_str = '--' else: style_str = '-' sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str, c='black', alpha=0.5) sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str, c='black', alpha=0.5) sub.set_frame_on(False) sub.set_yticks([]) sub.set_ylabel(ifo, rotation=45) sub.set_ylim([0, 1]) sub.set_xlim([float(extent[0]), float(extent[1])]) sub.get_xaxis().get_major_formatter().set_useOffset(False) sub.get_xaxis().get_major_formatter().set_scientific(False) sub.get_xaxis().tick_bottom() if sub is subs[-1]: sub.tick_params(labelsize=10, pad=1) else: sub.get_xaxis().set_ticks([]) sub.get_xaxis().set_ticklabels([]) xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval() ymin, _ = fig.axes[-1].get_yaxis().get_view_interval() fig.axes[-1].add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) fig.axes[-1].set_xlabel('GPS Time') fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name) plt.tight_layout() fig.subplots_adjust(hspace=0) plot_name = 'GRB%s_segments.png' % trigger_name plot_url = 'file://localhost%s/%s' % (out_dir, plot_name) fig.savefig('%s/%s' % (out_dir, plot_name)) return [ifos, plot_name, extent, plot_url]
def matches(row): return ( row[0].strip() == ifo and row[1] == name and int(row[2]) == int(version) and search_span.intersects(segment(row[3] + start_pad, row[4] + start_pad)) )
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False): """ Retrieve files needed to run coh_PTF jobs within a PyGRB workflow Parameters ---------- cp : pycbc.workflow.configuration.WorkflowConfigParser object The parsed configuration options of a pycbc.workflow.core.Workflow. ifos : str String containing the analysis interferometer IDs. run_dir : str The run directory, destination for retrieved files. bank_veto : Boolean If true, will retrieve the bank_veto_bank.xml file. summary_files : Boolean If true, will retrieve the summary page style files. Returns ------- file_list : pycbc.workflow.FileList object A FileList containing the retrieved files. """ if os.getenv("LAL_SRC") is None: raise ValueError("The environment variable LAL_SRC must be set to a " "location containing the file lalsuite.git") else: lalDir = os.getenv("LAL_SRC") sci_seg = segments.segment(int(cp.get("workflow", "start-time")), int(cp.get("workflow", "end-time"))) file_list = FileList([]) # Bank veto if bank_veto: shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "bank_veto_bank.xml" % lalDir, "%s" % run_dir) bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir bank_veto = File(ifos, "bank_veto_bank", sci_seg, file_url=bank_veto_url) bank_veto.PFN(bank_veto.cache_entry.path, site="local") file_list.extend(FileList([bank_veto])) if summary_files: # summary.js file shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir) summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \ % run_dir summary_js = File(ifos, "coh_PTF_html_summary_js", sci_seg, file_url=summary_js_url) summary_js.PFN(summary_js.cache_entry.path, site="local") file_list.extend(FileList([summary_js])) # summary.css file shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir) summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \ % run_dir summary_css = File(ifos, "coh_PTF_html_summary_css", sci_seg, file_url=summary_css_url) summary_css.PFN(summary_css.cache_entry.path, site="local") file_list.extend(FileList([summary_css])) return file_list
def __init__(self, *args, **kwargs): """ Intialize a CacheEntry object. The arguments can take two forms: a single string argument, which is interpreted and parsed as a line from a LAL cache file, or four arguments used to explicitly initialize the observatory, description, segment and URL in that order. When parsing a single line of text from a LAL cache, an optional key-word argument "coltype" can be provided to set the type the start and durations are parsed as. The default is pycbc_glue.lal.LIGOTimeGPS. Example: >>> c = CacheEntry("H1", "S5", segments.segment(815901601, 815902177.5), "file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> print c.segment [815901601 ... 815902177.5) >>> print str(c) H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml >>> c = CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml") >>> print c.segment [815901601 ... 815902177.5) >>> print CacheEntry("H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment [815901601.0 ... 815902177.5) See also the .from_T050017() class method for an alternative initialization mechanism. """ if len(args) == 1: # parse line of text as an entry in a cache file match = self._regex.search(args[0]) try: match = match.groupdict() except AttributeError: raise ValueError("could not convert %s to CacheEntry" % repr(args[0])) self.observatory = match["obs"] self.description = match["dsc"] start = match["strt"] duration = match["dur"] coltype = kwargs.pop("coltype", LIGOTimeGPS) if start == "-" and duration == "-": # no segment information self.segment = None else: start = coltype(start) self.segment = segments.segment(start, start + coltype(duration)) self.url = match["url"] if kwargs: raise TypeError("unrecognized keyword arguments: %s" % ", ".join(kwargs)) elif len(args) == 4: # parse arguments as observatory, description, # segment, url if kwargs: raise TypeError("invalid arguments: %s" % ", ".join(kwargs)) self.observatory, self.description, self.segment, self.url = args else: raise TypeError("invalid arguments: %s" % args) # "-" indicates an empty column if self.observatory == "-": self.observatory = None if self.description == "-": self.description = None
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<pycbc_glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps=="warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)
def vote(seglists, n): """ Given a sequence of segmentlists, returns the intervals during which at least n of them intersect. The input segmentlists must be coalesced, the output is coalesced. Example: >>> from pycbc_glue.segments import * >>> w = segmentlist([segment(0, 15)]) >>> x = segmentlist([segment(5, 20)]) >>> y = segmentlist([segment(10, 25)]) >>> z = segmentlist([segment(15, 30)]) >>> vote((w, x, y, z), 3) [segment(10, 20)] The sequence of segmentlists is only iterated over once, and the segmentlists within it are only iterated over once; they can all be generators. If there are a total of N segments in M segment lists and the final result has L segments the algorithm is O(N M) + O(L). """ # check for no-op if n < 1: return segments.segmentlist() # digest the segmentlists into an ordered sequence of off-on and # on-off transitions with the vote count for each transition # FIXME: this generator is declared locally for now, is it useful # as a stand-alone generator? def pop_min(l): # remove and return the smallest value from a list val = min(l) for i in xrange(len(l) - 1, -1, -1): if l[i] is val: return l.pop(i) assert False # cannot get here def vote_generator(seglists): queue = [] for seglist in seglists: segiter = iter(seglist) try: seg = segiter.next() except StopIteration: continue # put them in so that the smallest boundary is # closest to the end of the list queue.append((seg[1], -1, segiter)) queue.append((seg[0], +1, None)) if not queue: return queue.sort(reverse=True) bound = queue[-1][0] votes = 0 while queue: this_bound, delta, segiter = pop_min(queue) if this_bound == bound: votes += delta else: yield bound, votes bound = this_bound votes = delta if segiter is not None: try: seg = segiter.next() except StopIteration: continue queue.append((seg[1], -1, segiter)) queue.append((seg[0], +1, None)) yield bound, votes # compute the cumulative sum of votes, and assemble a segmentlist # from the intervals when the vote count is equal to or greater # than n result = segments.segmentlist() votes = 0 for bound, delta in vote_generator(seglists): if delta > 0 and n - delta <= votes < n: start = bound elif delta < 0 and n <= votes < n - delta: result.append(segments.segment(start, bound)) del start # detect stops that aren't preceded by starts votes += delta assert votes == 0 # detect failed cumulative sum return result
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name, out_dir, coherent_seg=None, fail_criterion=None): ifos = wkflow.ifos if len(science_segs.keys()) == 0: extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")), int(wkflow.cp.get("workflow", "end-time"))) else: pltpad = [ science_segs.extent_all()[1] - trigger_time, trigger_time - science_segs.extent_all()[0] ] extent = segments.segmentlist([ science_segs.extent_all(), segments.segment(trigger_time - pltpad[0], trigger_time + pltpad[1]) ]).extent() ifo_colors = {} for ifo in ifos: ifo_colors[ifo] = ifo_color(ifo) if ifo not in science_segs.keys(): science_segs[ifo] = segments.segmentlist([]) # Make plot fig, subs = plt.subplots(len(ifos), sharey=True) plt.xticks(rotation=20, ha='right') for sub, ifo in zip(subs, ifos): for seg in science_segs[ifo]: sub.add_patch( Rectangle((seg[0], 0.1), abs(seg), 0.8, facecolor=ifo_colors[ifo], edgecolor='none')) if coherent_seg: if len(science_segs[ifo]) > 0 and \ coherent_seg in science_segs[ifo]: sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange') sub.add_patch( Rectangle((coherent_seg[0], 0), abs(coherent_seg), 1, alpha=0.5, facecolor='orange', edgecolor='none')) else: sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange') sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--', c='orange', alpha=0.5) sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--', c='orange', alpha=0.5) else: sub.plot([trigger_time, trigger_time], [0, 1], ':k') if fail_criterion: if len(science_segs[ifo]) > 0: style_str = '--' else: style_str = '-' sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str, c='black', alpha=0.5) sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str, c='black', alpha=0.5) sub.set_frame_on(False) sub.set_yticks([]) sub.set_ylabel(ifo, rotation=45) sub.set_ylim([0, 1]) sub.set_xlim([float(extent[0]), float(extent[1])]) sub.get_xaxis().get_major_formatter().set_useOffset(False) sub.get_xaxis().get_major_formatter().set_scientific(False) sub.get_xaxis().tick_bottom() if sub is subs[-1]: sub.tick_params(labelsize=10, pad=1) else: sub.get_xaxis().set_ticks([]) sub.get_xaxis().set_ticklabels([]) xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval() ymin, _ = fig.axes[-1].get_yaxis().get_view_interval() fig.axes[-1].add_artist( Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2)) fig.axes[-1].set_xlabel('GPS Time') fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name) plt.tight_layout() fig.subplots_adjust(hspace=0) plot_name = 'GRB%s_segments.png' % trigger_name plot_url = 'file://localhost%s/%s' % (out_dir, plot_name) fig.savefig('%s/%s' % (out_dir, plot_name)) return [ifos, plot_name, extent, plot_url]
def coalesce_seg(database, start_time, end_time): ret = 0 #assume execution successufl try: st = int(start_time) et = int(end_time) db = str(database.strip()) #------------------------------------------------------------------- # Set up environment and get needed values #------------------------------------------------------------------- # Set up connection to the database dbconn = DB2.connect(dsn=db, uid='', pwd='', autoCommit=True) curs = dbconn.cursor() # create a new process_id sql = "select hex(GENERATE_UNIQUE()) from sysibm.sysdummy1" curs.execute(sql) hex_procid = curs.fetchone()[0] process_id = 'x' + '\'' + hex_procid + '\'' # determine the local creator_db sql = "SELECT DEFAULT FROM SYSCAT.COLUMNS WHERE " sql += "TABNAME = 'PROCESS' AND COLNAME = 'CREATOR_DB'" curs.execute(sql) creator_db = int(curs.fetchone()[0]) # prepare values for the new row to be inserted into the process table program = os.path.abspath(sys.argv[0]) node = socket.gethostname() username = pwd.getpwuid(os.getuid()).pw_name unix_procid = os.getpid() proc_start_time = gpstime.GpsSecondsFromPyUTC(time.time()) end_time = None jobid = 0 domain = 'coalesce_local' # insert new row into process table sql = "INSERT INTO process " sql += "(program, is_online, node, username, unix_procid, start_time, jobid, domain, process_id, creator_db) " sql += "VALUES ('%s', 0, '%s', '%s', %d, %d, %d, '%s',%s, %d)" % (program, node, username, unix_procid, proc_start_time, jobid, domain, process_id, creator_db) curs.execute(sql) # get the BLOB process_id for later reference sql = "SELECT BLOB(process_id) from process where hex(process_id)='%s' " % hex_procid curs.execute(sql) blob_procid = curs.fetchone()[0] #======================================================================== # # Main # #======================================================================== # Algorithm: # 1. Find distinct version 1 segment type from segment_summary table witnin start_time, end_time range # 2. Find segments and intervals to coalesce # 3. Coalesce segments and intervals # 4. Insert coaleseced segments back in to the database # 5. Delete uncoalesced segments and intervals from the database # 1. Find distinct segment types matching our criteria from segment_summary within the specified time range sql = "SELECT distinct(hex(segment_summary.segment_def_id)) FROM segment_summary, segment_definer, process " sql += "WHERE segment_summary.segment_def_id=segment_definer.segment_def_id " sql += "AND segment_summary.segment_def_cdb=segment_definer.creator_db " sql += "AND segment_summary.process_id=process.process_id " sql += "AND segment_summary.creator_db=process.creator_db " # Removed next line so that all segments are coalesced: this will be slower up front but faster for queries and the long run #sql += "AND ((segment_definer.name like 'DMT-%' and segment_definer.version=1) or (process.ifos='V1' and process.program='SegOnline')) " sql += "AND segment_summary.start_time <=%d " % et sql += "AND segment_summary.end_time >= %d " % st curs.execute(sql) def_ids = curs.fetchall() if not def_ids: data_existence = 0 else: data_existence = 1 # loop in the segment types to fetch, coalesce, insert and delete for d in def_ids: # get the BLOB segment_def_id for later use sql = "SELECT BLOB(segment_def_id), ifos, name, version, creator_db " sql += "FROM segment_definer " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] curs.execute(sql) result = curs.fetchone() blob_defid = result[0] ifos = result[1].strip() name = result[2] ver = result[3] def_cdb = result[4] # 2. Find segments and intervals to coalesce # get the segment start_time, end_time to coalesce, and according primary key to delete try: curs.execute("drop view seg_view") except: pass sql = "CREATE view seg_view (st,et,seg_id) AS " sql += "SELECT start_time,end_time, segment_id from segment " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] sql += "AND segment.start_time <=%d " % et sql += "AND segment.end_time >= %d " % st print >> sys.stdout, ("Selecting segments to coalesce for %s version:%d %s ... " % (ifos,ver, name)) curs.execute(sql) curs.execute("SELECT st,et from seg_view") seg_bf_cos = curs.fetchall() # get the segments to coalesce # get the summary start_time, end_time to coalesce, and according primary key to delete try: curs.execute("drop view sum_view") except: pass sql = "CREATE view sum_view (st,et,sum_id) AS " sql += "SELECT start_time,end_time, segment_sum_id from segment_summary " sql += "WHERE hex(segment_def_id) = '%s' " % d[0] sql += "AND segment_summary.start_time <=%d " % et sql += "AND segment_summary.end_time >= %d " % st curs.execute(sql) curs.execute("SELECT st,et from sum_view") sum_bf_cos = curs.fetchall() # get the summarys to coalesce # 3. Coalesce segments and intervals print >> sys.stdout, "Coalescing segments ... " segs = segments.segmentlist([]) sums = segments.segmentlist([]) for bf in seg_bf_cos: seg = segments.segment(int(bf[0]), int(bf[1])) segs.append(seg) for bf in sum_bf_cos: sum = segments.segment(int(bf[0]), int(bf[1])) sums.append(sum) segs.coalesce() sums.coalesce() # 4. Insert coaleseced segments back in to the database # insert coalesced segs into segment table insert_list = [] for s in segs: # generate unique id for insertion curs.execute("VALUES BLOB(GENERATE_UNIQUE())") prim_id = curs.fetchone()[0] # generate a list of values to insert using executemany() insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid)) sql = "INSERT INTO segment " sql += "(segment_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) " sql += "VALUES (?,?,?,?,?,?,?) " print >> sys.stdout, "Inserting coalesced segments back in ... " curs.executemany(sql, insert_list) # insert coalesced sums into segment_summary table insert_list = [] for s in sums: # generate unique id for insertion curs.execute("VALUES BLOB(GENERATE_UNIQUE())") prim_id = curs.fetchone()[0] # generate a list of values to insert using executemany() insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid)) sql = "INSERT INTO segment_summary " sql += "(segment_sum_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) " sql += "VALUES (?,?,?,?,?,?,?) " curs.executemany(sql, insert_list) # 5. Delete uncoalesced segments and intervals from the database print >> sys.stdout, "Deleting un-coaleseced segments ... " print >> sys.stdout sql = "DELETE FROM segment " sql += "WHERE segment_id in (select seg_id from seg_view) " sql += "AND process_id != %s " % process_id curs.execute(sql) sql = "DELETE FROM segment_summary " sql += "WHERE segment_sum_id in (select sum_id from sum_view) " sql += "AND process_id != %s " % process_id curs.execute(sql) # update end_time in process table sql = "update process set end_time=%d where hex(process_id)='%s' " % (gpstime.GpsSecondsFromPyUTC(time.time()),hex_procid) curs.execute(sql) try: curs.execute("drop view seg_view") curs.execute("drop view sum_view") except: pass curs.close() except Exception,e: ret = str(e) print >> sys.stdout, ("%s" % ret)
def get_coh_PTF_files(cp, ifos, run_dir, bank_veto=False, summary_files=False): """ Retrieve files needed to run coh_PTF jobs within a PyGRB workflow Parameters ---------- cp : pycbc.workflow.configuration.WorkflowConfigParser object The parsed configuration options of a pycbc.workflow.core.Workflow. ifos : str String containing the analysis interferometer IDs. run_dir : str The run directory, destination for retrieved files. bank_veto : Boolean If true, will retrieve the bank_veto_bank.xml file. summary_files : Boolean If true, will retrieve the summary page style files. Returns ------- file_list : pycbc.workflow.FileList object A FileList containing the retrieved files. """ if os.getenv("LAL_SRC") is None: raise ValueError("The environment variable LAL_SRC must be set to a " "location containing the file lalsuite.git") else: lalDir = os.getenv("LAL_SRC") sci_seg = segments.segment(int(cp.get("workflow", "start-time")), int(cp.get("workflow", "end-time"))) file_list = FileList([]) # Bank veto if bank_veto: shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "bank_veto_bank.xml" % lalDir, "%s" % run_dir) bank_veto_url = "file://localhost%s/bank_veto_bank.xml" % run_dir bank_veto = File(ifos, "bank_veto_bank", sci_seg, file_url=bank_veto_url) bank_veto.PFN(bank_veto.cache_entry.path, site="local") file_list.extend(FileList([bank_veto])) if summary_files: # summary.js file shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "coh_PTF_html_summary.js" % lalDir, "%s" % run_dir) summary_js_url = "file://localhost%s/coh_PTF_html_summary.js" \ % run_dir summary_js = File(ifos, "coh_PTF_html_summary_js", sci_seg, file_url=summary_js_url) summary_js.PFN(summary_js.cache_entry.path, site="local") file_list.extend(FileList([summary_js])) # summary.css file shutil.copy("%s/lalapps/src/ring/coh_PTF_config_files/" \ "coh_PTF_html_summary.css" % lalDir, "%s" % run_dir) summary_css_url = "file://localhost%s/coh_PTF_html_summary.css" \ % run_dir summary_css = File(ifos, "coh_PTF_html_summary_css", sci_seg, file_url=summary_css_url) summary_css.PFN(summary_css.cache_entry.path, site="local") file_list.extend(FileList([summary_css])) return file_list
def query_segments(engine, table, segdefs): # each segdef is a list containing: # ifo, name, version, start_time, end_time, start_pad, end_pad # The trivial case: if there's nothing to do, return no time if len(segdefs) == 0: return [ segmentlist([]) ] # # For the sake of efficiency we query the database for all the segdefs at once # This constructs a clause that looks for one # def make_clause(table, segdef): ifo, name, version, start_time, end_time, start_pad, end_pad = segdef sql = " (segment_definer.ifos = '%s' " % ifo sql += "AND segment_definer.name = '%s' " % name sql += "AND segment_definer.version = %s " % version sql += "AND NOT (%d > %s.end_time OR %s.start_time > %d)) " % (start_time, table, table, end_time) return sql clauses = [make_clause(table, segdef) for segdef in segdefs] sql = 'SELECT segment_definer.ifos, segment_definer.name, segment_definer.version, ' sql += ' %s.start_time, %s.end_time ' % (table, table) sql += ' FROM segment_definer, %s ' % table sql += ' WHERE %s.segment_def_id = segment_definer.segment_def_id AND ' % table if engine.__class__ == query_engine.LdbdQueryEngine: sql += " %s.segment_def_cdb = segment_definer.creator_db AND " % table sql += '( ' + ' OR '.join(clauses) + ' )' rows = engine.query(sql) # # The result of a query will be rows of the form # ifo, name, version, start_time, end_time # # We want to associate each returned row with the segdef it belongs to so that # we can apply the correct padding. # # If segdefs were uniquely spcified by (ifo, name, version) this would # be easy, but it may happen that we're looking for the same segment definer # at multiple disjoint times. In particular this can happen if the user # didn't specify a version number; in that case we might have version 2 # of some flag defined over multiple disjoint segment_definers. # results = [] for segdef in segdefs: ifo, name, version, start_time, end_time, start_pad, end_pad = segdef search_span = segment(start_time, end_time) search_span_list = segmentlist([search_span]) # See whether the row belongs to the current segdef. Name, ifo and version must match # and the padded segment must overlap with the range of the segdef. def matches(row): return ( row[0].strip() == ifo and row[1] == name and int(row[2]) == int(version) and search_span.intersects(segment(row[3] + start_pad, row[4] + start_pad)) ) # Add the padding. Segments may extend beyond the time of interest, chop off the excess. def pad_and_truncate(row_start, row_end): tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)]) # No coalesce needed as a list with a single segment is already coalesced tmp &= search_span_list # The intersection is guaranteed to be non-empty if the row passed match() # PR 2969: The above comment is incorrect. Negative padding may cause # an empty intersection. if len(tmp) == 0: return segment(0,0) else: return tmp[0] # Build a segment list from the returned segments, padded and trunctated. The segments will # not necessarily be disjoint, if the padding crosses gaps. They are also not gauranteed to # be in order, since there's no ORDER BY in the query. So the list needs to be coalesced # before arithmatic can be done with it. result = segmentlist( [pad_and_truncate(row[3], row[4]) for row in rows if matches(row)] ).coalesce() # This is not needed: since each of the segments are constrained to be within the search # span the whole list must be as well. # result &= search_span_list results.append(result) return results
def associate_psds_to_segments(opt, fd_segments, gwstrain, flen, delta_f, flow, dyn_range_factor=1., precision=None): """Generate a set of overlapping PSDs covering the data in GWstrain. Then associate these PSDs with the appropriate segment in strain_segments. Parameters ----------- opt : object Result of parsing the CLI with OptionParser, or any object with the required attributes (psd_model, psd_file, asd_file, psd_estimation, psd_segment_length, psd_segment_stride, psd_inverse_length, psd_output). fd_segments : StrainSegments.fourier_segments() object The fourier transforms of the various analysis segments. The psd attribute of each segment is updated to point to the appropriate PSD. gwstrain : Strain object The timeseries of raw data on which to estimate PSDs. flen : int The length in samples of the output PSDs. delta_f : float The frequency step of the output PSDs. flow: float The low frequncy cutoff to use when calculating the PSD. dyn_range_factor : {1, float} For PSDs taken from models or text files, if `dyn_range_factor` is not None, then the PSD is multiplied by `dyn_range_factor` ** 2. precision : str, choices (None,'single','double') If not specified, or specified as None, the precision of the returned PSD will match the precision of the data, if measuring a PSD, or will match the default precision of the model if using an analytical PSD. If 'single' the PSD will be converted to float32, if not already in that precision. If 'double' the PSD will be converted to float64, if not already in that precision. """ psds_and_times = generate_overlapping_psds( opt, gwstrain, flen, delta_f, flow, dyn_range_factor=dyn_range_factor, precision=precision) for fd_segment in fd_segments: best_psd = None psd_overlap = 0 inp_seg = segments.segment(fd_segment.seg_slice.start, fd_segment.seg_slice.stop) for start_idx, end_idx, psd in psds_and_times: psd_seg = segments.segment(start_idx, end_idx) if psd_seg.intersects(inp_seg): curr_overlap = abs(inp_seg & psd_seg) if curr_overlap > psd_overlap: psd_overlap = curr_overlap best_psd = psd if best_psd is None: err_msg = "No PSDs found intersecting segment!" raise ValueError(err_msg) fd_segment.psd = best_psd
def find_frame_urls(self, site, frametype, gpsstart, gpsend, match=None, urltype=None, on_gaps="warn"): """Find the framefiles for the given type in the [start, end) interval frame @param site: single-character name of site to match @param frametype: name of frametype to match @param gpsstart: integer GPS start time of query @param gpsend: integer GPS end time of query @param match: regular expression to match against @param urltype: file scheme to search for (e.g. 'file') @param on_gaps: what to do when the requested frame isn't found, one of: - C{'warn'} (default): print a warning, - C{'error'}: raise an L{RuntimeError}, or - C{'ignore'}: do nothing @type site: L{str} @type frametype: L{str} @type gpsstart: L{int} @type gpsend: L{int} @type match: L{str} @type urltype: L{str} @type on_gaps: L{str} @returns: L{Cache<pycbc_glue.lal.Cache>} @raises RuntimeError: if gaps are found and C{on_gaps='error'} """ if on_gaps not in ("warn", "error", "ignore"): raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.") url = ("%s/gwf/%s/%s/%s,%s" % (_url_prefix, site, frametype, gpsstart, gpsend)) # if a URL type is specified append it to the path if urltype: url += "/%s" % urltype # request JSON output url += ".json" # append a regex if input if match: url += "?match=%s" % match # make query response = self._requestresponse("GET", url) urllist = decode(response.read()) out = lal.Cache([ lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType) for x in urllist ]) if on_gaps == "ignore": return out else: span = segments.segment(gpsstart, gpsend) seglist = segments.segmentlist(e.segment for e in out).coalesce() missing = (segments.segmentlist([span]) - seglist).coalesce() if span in seglist: return out else: msg = "Missing segments: \n%s" % "\n".join(map(str, missing)) if on_gaps == "warn": sys.stderr.write("%s\n" % msg) return out else: raise RuntimeError(msg)