Exemplo n.º 1
0
Arquivo: psd.py Projeto: cmbiwer/pycbc
def setup_psd_calculate(workflow, frame_files, ifo, segments,
                        segment_name, out_dir, tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd', 'parallelization-factor', tags=tags):
        num_parts = int(workflow.cp.get_opt_tags('workflow-psd', 
                                                 'parallelization-factor',
                                                 tags=tags))
    else:
        num_parts = 1
        
    # get rid of duplicate segments which happen when splitting the bank
    segments = segmentlist(frozenset(segments))
        
    segment_lists = list(chunks(segments, num_parts)) 
    
    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = SegFile.from_segment_list('%s_%s' %(segment_name, i),
                         segmentlist(segs), segment_name, ifo,
                         valid_segment=workflow.analysis_time,
                         extension='xml', directory=out_dir)

        psd_files += [make_psd_file(workflow, frame_files, seg_file,
                                    segment_name, out_dir, 
                                    tags=tags + ['PART%s' % i])]
    
    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Exemplo n.º 2
0
def build_segment_list_one(engine, gps_start_time, gps_end_time, ifo, segment_name, version = None, start_pad = 0, end_pad = 0):
    """Builds a list of segments satisfying the given criteria """
    seg_result = segmentlist([])
    sum_result = segmentlist([])

    # Is there any way to get segment and segement summary in one query?
    # Maybe some sort of outer join where we keep track of which segment
    # summaries we've already seen.
    sql = "SELECT segment_summary.start_time, segment_summary.end_time "
    sql += "FROM segment_definer, segment_summary "
    sql += "WHERE segment_summary.segment_def_id = segment_definer.segment_def_id "
    sql += "AND   segment_definer.ifos = '%s' " % ifo
    if engine.__class__ == query_engine.LdbdQueryEngine:
       sql += "AND segment_summary.segment_def_cdb = segment_definer.creator_db "
    sql += "AND   segment_definer.name = '%s' " % segment_name
    sql += "AND   segment_definer.version = %s " % version
    sql += "AND NOT (%s > segment_summary.end_time OR segment_summary.start_time > %s)" % (gps_start_time, gps_end_time)

    rows = engine.query(sql)

    for sum_start_time, sum_end_time in rows:
        sum_start_time = (sum_start_time < gps_start_time) and gps_start_time or sum_start_time
        sum_end_time = (sum_end_time > gps_end_time) and gps_end_time or sum_end_time

        sum_result |= segmentlist([segment(sum_start_time, sum_end_time)])

    # We can't use queries paramaterized with ? since the ldbd protocol doesn't support it...
    sql = "SELECT segment.start_time + %d, segment.end_time + %d " % (start_pad, end_pad)
    sql += "FROM segment, segment_definer "
    sql += "WHERE segment.segment_def_id = segment_definer.segment_def_id "

    if engine.__class__ == query_engine.LdbdQueryEngine:
       sql += "AND segment.segment_def_cdb = segment_definer.creator_db "
    sql += "AND   segment_definer.ifos = '%s' " % ifo
    sql += "AND   segment_definer.name = '%s' " % segment_name
    sql += "AND   segment_definer.version = %s " % version
    sql += "AND NOT (%s > segment.end_time OR segment.start_time > %s)" % (gps_start_time, gps_end_time)

    rows = engine.query(sql)
    
    for seg_start_time, seg_end_time in rows:
        seg_start_time = (seg_start_time < gps_start_time) and gps_start_time or seg_start_time
        seg_end_time = (seg_end_time > gps_end_time) and gps_end_time or seg_end_time

        seg_result |= segmentlist([segment(seg_start_time, seg_end_time)])

    engine.close()

    return sum_result, seg_result
Exemplo n.º 3
0
def setup_psd_calculate(workflow,
                        frame_files,
                        ifo,
                        segments,
                        segment_name,
                        out_dir,
                        tags=None):
    make_analysis_dir(out_dir)
    tags = [] if not tags else tags
    if workflow.cp.has_option_tags('workflow-psd',
                                   'parallelization-factor',
                                   tags=tags):
        num_parts = int(
            workflow.cp.get_opt_tags('workflow-psd',
                                     'parallelization-factor',
                                     tags=tags))
    else:
        num_parts = 1

    # get rid of duplicate segments which happen when splitting the bank
    segments = segmentlist(frozenset(segments))

    segment_lists = list(chunks(segments, num_parts))

    psd_files = FileList([])
    for i, segs in enumerate(segment_lists):
        seg_file = SegFile.from_segment_list(
            '%s_%s' % (segment_name, i),
            segmentlist(segs),
            segment_name,
            ifo,
            valid_segment=workflow.analysis_time,
            extension='xml',
            directory=out_dir)

        psd_files += [
            make_psd_file(workflow,
                          frame_files,
                          seg_file,
                          segment_name,
                          out_dir,
                          tags=tags + ['PART%s' % i])
        ]

    if num_parts > 1:
        return merge_psds(workflow, psd_files, ifo, out_dir, tags=tags)
    else:
        return psd_files[0]
Exemplo n.º 4
0
def get_science_segs_from_datafind_outs(datafindcaches):
    """
    This function will calculate the science segments that are covered in
    the OutGroupList containing the frame files returned by various
    calls to the datafind server. This can then be used to check whether this
    list covers what it is expected to cover.

    Parameters
    ----------
    datafindcaches : OutGroupList
        List of all the datafind output files.

    Returns
    --------
    newScienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        The times covered by the frames found in datafindOuts.
    """
    newScienceSegs = {}
    for cache in datafindcaches:
        if len(cache) > 0:
            groupSegs = segments.segmentlist(e.segment for e in cache).coalesce()
            ifo = cache.ifo
            if not newScienceSegs.has_key(ifo):
                newScienceSegs[ifo] = groupSegs
            else:
                newScienceSegs[ifo].extend(groupSegs)
                newScienceSegs[ifo].coalesce()
    return newScienceSegs
Exemplo n.º 5
0
def fromtama(file, coltype=lal.LIGOTimeGPS):
    """
	Read a segmentlist from the file object file containing TAMA
	locked-segments data.  Parsing stops on the first line that cannot
	be parsed (which is consumed).  The segmentlist will be created
	with segments whose boundaries are of type coltype, which should
	raise ValueError if it cannot convert its string argument.

	NOTE:  TAMA locked-segments files contain non-integer start and end
	times, so the default column type is set to LIGOTimeGPS.

	NOTE:  the output is a segmentlist as described by the file;  if
	the segments in the input file are not coalesced or out of order,
	then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.
	"""
    segmentpat = re.compile(
        r"\A\s*\S+\s+\S+\s+\S+\s+([\d.+-eE]+)\s+([\d.+-eE]+)")
    l = segments.segmentlist()
    for line in file:
        try:
            [tokens] = segmentpat.findall(line)
            l.append(segments.segment(map(coltype, tokens[0:2])))
        except ValueError:
            break
    return l
Exemplo n.º 6
0
	def segmentlistdict(self):
		"""
		A segmentlistdict object describing the instruments and
		time spanned by this CacheEntry.  A new object is
		constructed each time this attribute is accessed (segments
		are immutable so there is no reason to try to share a
		reference to the CacheEntry's internal segment;
		modifications of one would not be reflected in the other
		anyway).

		Example:

		>>> c = CacheEntry(u"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
		>>> c.segmentlistdict
		{u'H1': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]}

		The \"observatory\" column of the cache entry, which is
		frequently used to store instrument names, is parsed into
		instrument names for the dictionary keys using the same
		rules as pycbc_glue.ligolw.lsctables.instrument_set_from_ifos().

		Example:

		>>> c = CacheEntry(u"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
		>>> c.segmentlistdict
		{u'H1H2': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]}
		"""
		# the import has to be done here to break the cyclic
		# dependancy
		from pycbc_glue.ligolw.lsctables import instrument_set_from_ifos
		instruments = instrument_set_from_ifos(self.observatory) or (None,)
		return segments.segmentlistdict((instrument, segments.segmentlist(self.segment is not None and [self.segment] or [])) for instrument in instruments)
Exemplo n.º 7
0
def indices_within_segments(times, segment_files, ifo=None, segment_name=None):
    """ Return the list of indices that should be vetoed by the segments in the
    list of veto_files.
    
    Parameters
    ----------
    times: numpy.ndarray of integer type
        Array of gps start times
    segment_files: string or list of strings
        A string or list of strings that contain the path to xml files that
        contain a segment table
    ifo: string, optional
        The ifo to retrieve segments for from the segment files
    segment_name: str, optional
        name of segment       
    Returns
    -------
    indices: numpy.ndarray
        The array of index values within the segments
    segmentlist: 
        The segment list corresponding to the selected time.
    """
    veto_segs = segmentlist([])
    indices = numpy.array([], dtype=numpy.uint32)   
    for veto_file in segment_files:
        veto_segs += select_segments_by_definer(veto_file, segment_name, ifo)
    veto_segs.coalesce()  
    
    start, end = segments_to_start_end(veto_segs)
    if len(start) > 0:
        idx = indices_within_times(times, start, end)
        indices = numpy.union1d(indices, idx)

    return indices, veto_segs.coalesce()
Exemplo n.º 8
0
	def __init__(self, active = (), valid = (), instruments = set(), name = None, version = None, comment = None):
		"""
		Initialize a new LigolwSegmentList instance.  active and
		valid are sequences that will be cast to
		segments.segmentlist objects.  They can be generator
		expressions.  The "active" sequence is what is usually
		thought of as the segment list, the "valid" sequence
		identifies the intervals of time for which the segment
		list's state is defined.
		"""
		self.valid = segments.segmentlist(valid)
		self.active = segments.segmentlist(active)
		self.instruments = instruments
		self.name = name
		self.version = version
		self.comment = comment
Exemplo n.º 9
0
def find_segments(doc, key, use_segment_table = True):
    key_pieces = key.split(':')
    while len(key_pieces) < 3:
        key_pieces.append('*')

    filter_func = lambda x: str(x.ifos) == key_pieces[0] and (str(x.name) == key_pieces[1] or key_pieces[1] == '*') and (str(x.version) == key_pieces[2] or key_pieces[2] == '*') 

    # Find all segment definers matching the critieria
    seg_def_table = table.get_table(doc, lsctables.SegmentDefTable.tableName)
    seg_defs      = filter(filter_func, seg_def_table)
    seg_def_ids   = map(lambda x: str(x.segment_def_id), seg_defs)

    # Find all segments belonging to those definers
    if use_segment_table:
        seg_table     = table.get_table(doc, lsctables.SegmentTable.tableName)
        seg_entries   = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_table)
    else:
        seg_sum_table = table.get_table(doc, lsctables.SegmentSumTable.tableName)
        seg_entries   = filter(lambda x: str(x.segment_def_id) in seg_def_ids, seg_sum_table)

    # Combine into a segmentlist
    ret = segmentlist(map(lambda x: segment(x.start_time, x.end_time), seg_entries))

    ret.coalesce()

    return ret
Exemplo n.º 10
0
def fromfilenames(filenames, coltype=int):
    """
	Return a segmentlist describing the intervals spanned by the files
	whose names are given in the list filenames.  The segmentlist is
	constructed by parsing the file names, and the boundaries of each
	segment are coerced to type coltype.

	The file names are parsed using a generalization of the format
	described in Technical Note LIGO-T010150-00-E, which allows the
	start time and duration appearing in the file name to be
	non-integers.

	NOTE:  the output is a segmentlist as described by the file names;
	if the file names are not in time order, or describe overlaping
	segments, then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.
	"""
    pattern = re.compile(r"-([\d.]+)-([\d.]+)\.[\w_+#]+\Z")
    l = segments.segmentlist()
    for name in filenames:
        [(s, d)] = pattern.findall(name.strip().rstrip(".gz"))
        s = coltype(s)
        d = coltype(d)
        l.append(segments.segment(s, s + d))
    return l
Exemplo n.º 11
0
    def find_times(self, site, frametype, gpsstart=None, gpsend=None):
        """Query the LDR for times for which frames are avaliable

        Use gpsstart and gpsend to restrict the returned times to
        this semiopen interval.

        @returns: L{segmentlist<pycbc_glue.segments.segmentlist>}

        @param site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param gpsstart:
            integer GPS start time of query
        @param gpsend:
            integer GPS end time of query

        @type       site: L{str}
        @type  frametype: L{str}
        @type   gpsstart: L{int}
        @type     gpsend: L{int}
        """
        if gpsstart and gpsend:
            url = ("%s/gwf/%s/%s/segments/%s,%s.json" %
                   (_url_prefix, site, frametype, gpsstart, gpsend))
        else:
            url = ("%s/gwf/%s/%s/segments.json" %
                   (_url_prefix, site, frametype))

        response = self._requestresponse("GET", url)
        segmentlist = decode(response.read())
        return segments.segmentlist(map(segments.segment, segmentlist))
Exemplo n.º 12
0
def get_science_segs_from_datafind_outs(datafindcaches):
    """
    This function will calculate the science segments that are covered in
    the OutGroupList containing the frame files returned by various
    calls to the datafind server. This can then be used to check whether this
    list covers what it is expected to cover.

    Parameters
    ----------
    datafindcaches : OutGroupList
        List of all the datafind output files.

    Returns
    --------
    newScienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        The times covered by the frames found in datafindOuts.
    """
    newScienceSegs = {}
    for cache in datafindcaches:
        if len(cache) > 0:
            groupSegs = segments.segmentlist(e.segment
                                             for e in cache).coalesce()
            ifo = cache.ifo
            if not newScienceSegs.has_key(ifo):
                newScienceSegs[ifo] = groupSegs
            else:
                newScienceSegs[ifo].extend(groupSegs)
                newScienceSegs[ifo].coalesce()
    return newScienceSegs
Exemplo n.º 13
0
def expand_version_number(engine, segdef):
    ifo, name, version, start_time, end_time, start_pad, end_pad = segdef

    if version != '*':
        return [segdef]

    # Start looking at the full interval
    intervals = segmentlist([segment(start_time, end_time)])

    # Find the maximum version number
    sql  = "SELECT max(version) FROM segment_definer "
    sql += "WHERE  segment_definer.ifos = '%s' " % ifo
    sql += "AND   segment_definer.name = '%s' " % name

    rows    = engine.query(sql)
    try:
        version = len(rows[0]) and rows[0][0] or 1
    except:
        version = None

    results = []

    while version > 0:
        for interval in intervals:
            segs = query_segments(engine, 'segment_summary', [(ifo, name, version, interval[0], interval[1], 0, 0)])

            for seg in segs[0]:
                results.append( (ifo, name, version, seg[0], seg[1], 0, 0) )

        intervals.coalesce()
        intervals -= segs[0]

        version -= 1

    return results
Exemplo n.º 14
0
    def segmentlistdict(self):
        """
		A segmentlistdict object describing the instruments and
		time spanned by this CacheEntry.  A new object is
		constructed each time this attribute is accessed (segments
		are immutable so there is no reason to try to share a
		reference to the CacheEntry's internal segment;
		modifications of one would not be reflected in the other
		anyway).

		Example:

		>>> c = CacheEntry(u"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
		>>> c.segmentlistdict
		{u'H1': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]}

		The \"observatory\" column of the cache entry, which is
		frequently used to store instrument names, is parsed into
		instrument names for the dictionary keys using the same
		rules as pycbc_glue.ligolw.lsctables.instrument_set_from_ifos().

		Example:

		>>> c = CacheEntry(u"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
		>>> c.segmentlistdict
		{u'H1H2': [segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]}
		"""
        # the import has to be done here to break the cyclic
        # dependancy
        from pycbc_glue.ligolw.lsctables import instrument_set_from_ifos
        instruments = instrument_set_from_ifos(self.observatory) or (None, )
        return segments.segmentlistdict(
            (instrument,
             segments.segmentlist(self.segment is not None and [self.segment]
                                  or [])) for instrument in instruments)
Exemplo n.º 15
0
def indices_within_segments(times, segment_files, ifo=None, segment_name=None):
    """ Return the list of indices that should be vetoed by the segments in the
    list of veto_files.
    
    Parameters
    ----------
    times: numpy.ndarray of integer type
        Array of gps start times
    segment_files: string or list of strings
        A string or list of strings that contain the path to xml files that
        contain a segment table
    ifo: string, optional
        The ifo to retrieve segments for from the segment files
    segment_name: str, optional
        name of segment       
    Returns
    -------
    indices: numpy.ndarray
        The array of index values within the segments
    segmentlist: 
        The segment list corresponding to the selected time.
    """
    veto_segs = segmentlist([])
    indices = numpy.array([], dtype=numpy.uint32)   
    for veto_file in segment_files:
        veto_segs += select_segments_by_definer(veto_file, segment_name, ifo)
    veto_segs.coalesce()  
    
    start, end = segments_to_start_end(veto_segs)
    if len(start) > 0:
        idx = indices_within_times(times, start, end)
        indices = numpy.union1d(indices, idx)

    return indices, veto_segs.coalesce()
Exemplo n.º 16
0
    def find_times(self, site, frametype, gpsstart=None, gpsend=None):
        """Query the LDR for times for which frames are avaliable

        Use gpsstart and gpsend to restrict the returned times to
        this semiopen interval.

        @returns: L{segmentlist<pycbc_glue.segments.segmentlist>}

        @param site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param gpsstart:
            integer GPS start time of query
        @param gpsend:
            integer GPS end time of query

        @type       site: L{str}
        @type  frametype: L{str}
        @type   gpsstart: L{int}
        @type     gpsend: L{int}
        """
        if gpsstart and gpsend:
            url = ("%s/gwf/%s/%s/segments/%s,%s.json"
                   % (_url_prefix, site, frametype, gpsstart, gpsend))
        else:
            url = ("%s/gwf/%s/%s/segments.json"
                   % (_url_prefix, site, frametype))

        response = self._requestresponse("GET", url)
        segmentlist = decode(response.read())
        return segments.segmentlist(map(segments.segment, segmentlist))
Exemplo n.º 17
0
def fromsegwizard(file, coltype=int, strict=True):
    """
	Read a segmentlist from the file object file containing a segwizard
	compatible segment list.  Parsing stops on the first line that
	cannot be parsed (which is consumed).  The segmentlist will be
	created with segment whose boundaries are of type coltype, which
	should raise ValueError if it cannot convert its string argument.
	Two-column, three-column, and four-column segwizard files are
	recognized, but the entire file must be in the same format, which
	is decided by the first parsed line.  If strict is True and the
	file is in three- or four-column format, then each segment's
	duration is checked against that column in the input file.

	NOTE:  the output is a segmentlist as described by the file;  if
	the segments in the input file are not coalesced or out of order,
	then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.
	"""
    commentpat = re.compile(r"\s*([#;].*)?\Z", re.DOTALL)
    twocolsegpat = re.compile(r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
    threecolsegpat = re.compile(
        r"\A\s*([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
    fourcolsegpat = re.compile(
        r"\A\s*([\d]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s+([\d.+-eE]+)\s*\Z")
    format = None
    l = segments.segmentlist()
    for line in file:
        line = commentpat.split(line)[0]
        if not line:
            continue
        try:
            [tokens] = fourcolsegpat.findall(line)
            num = int(tokens[0])
            seg = segments.segment(map(coltype, tokens[1:3]))
            duration = coltype(tokens[3])
            this_line_format = 4
        except ValueError:
            try:
                [tokens] = threecolsegpat.findall(line)
                seg = segments.segment(map(coltype, tokens[0:2]))
                duration = coltype(tokens[2])
                this_line_format = 3
            except ValueError:
                try:
                    [tokens] = twocolsegpat.findall(line)
                    seg = segments.segment(map(coltype, tokens[0:2]))
                    duration = abs(seg)
                    this_line_format = 2
                except ValueError:
                    break
        if strict:
            if abs(seg) != duration:
                raise ValueError("segment '%s' has incorrect duration" % line)
            if format is None:
                format = this_line_format
            elif format != this_line_format:
                raise ValueError("segment '%s' format mismatch" % line)
        l.append(seg)
    return l
Exemplo n.º 18
0
def S2playground(extent):
    """
	Return a segmentlist identifying the S2 playground times within the
	interval defined by the segment extent.

	Example:

	>>> from pycbc_glue import segments
	>>> S2playground(segments.segment(874000000, 874010000))
	[segment(874000013, 874000613), segment(874006383, 874006983)]
	"""
    lo = int(extent[0])
    lo -= (lo - 729273613) % 6370
    hi = int(extent[1]) + 1
    return segments.segmentlist(
        segments.segment(t, t + 600)
        for t in range(lo, hi, 6370)) & segments.segmentlist([extent])
Exemplo n.º 19
0
def run_query_segments(doc, proc_id, engine, gps_start_time, gps_end_time, included_segments_string, excluded_segments_string = None, write_segments = True, start_pad = 0, end_pad = 0):
    """Runs a segment query.  This was originally part of ligolw_query_segments, but now is also
    used by ligolw_segments_from_cats.

    The write_segments option is provided so callers can coalesce segments obtained over
    sever invocations (as segments_from_cats does).
    """
    
    if write_segments:
        all_ifos = {}

        for ifo, segment_name, version in split_segment_ids(included_segments_string.split(',')):
            all_ifos[ifo] = True


        new_seg_def_id = add_to_segment_definer(doc, proc_id, ''.join(all_ifos.keys()), 'result', 0)
        add_to_segment_summary(doc, proc_id, new_seg_def_id, [[gps_start_time, gps_end_time]])

    result = segmentlist([])

    for ifo, segment_name, version in split_segment_ids(included_segments_string.split(',')):
        sum_segments, seg_segments = build_segment_list(engine, gps_start_time, gps_end_time, ifo, segment_name, version, start_pad, end_pad)
        seg_def_id                 = add_to_segment_definer(doc, proc_id, ifo, segment_name, version)

        add_to_segment_summary(doc, proc_id, seg_def_id, sum_segments)

        # and accumulate segments 
        result |= seg_segments

    # Excluded segments are not required
    if excluded_segments_string:
        excluded_segments = segmentlist([])

        for ifo, segment_name, version in split_segment_ids(excluded_segments_string.split(',')):
            sum_segments, seg_segments = build_segment_list(engine, gps_start_time, gps_end_time, ifo, segment_name, version)
            excluded_segments |= seg_segments

        result = result - excluded_segments

    result.coalesce()
    
    # Add the segments
    if write_segments:
        add_to_segment(doc, proc_id, new_seg_def_id, result)

    return result
Exemplo n.º 20
0
def get_segment_summary_times(scienceFile, segmentName):
    """
    This function will find the times for which the segment_summary is set
    for the flag given by segmentName.

    Parameters
    -----------
    scienceFile : SegFile
        The segment file that we want to use to determine this.
    segmentName : string
        The DQ flag to search for times in the segment_summary table.

    Returns
    ---------
    summSegList : glue.segments.segmentlist
        The times that are covered in the segment summary table.
    """
    # Parse the segmentName
    segmentName = segmentName.split(':')
    if not len(segmentName) in [2, 3]:
        raise ValueError("Invalid channel name %s." % (segmentName))
    ifo = segmentName[0]
    channel = segmentName[1]
    version = ''
    if len(segmentName) == 3:
        version = int(segmentName[2])

    # Load the filename
    xmldoc = utils.load_filename(
        scienceFile.cache_entry.path,
        gz=scienceFile.cache_entry.path.endswith("gz"),
        contenthandler=ContentHandler)

    # Get the segment_def_id for the segmentName
    segmentDefTable = table.get_table(xmldoc, "segment_definer")
    for entry in segmentDefTable:
        if (entry.ifos == ifo) and (entry.name == channel):
            if len(segmentName) == 2 or (entry.version == version):
                segDefID = entry.segment_def_id
                break
    else:
        raise ValueError("Cannot find channel %s in segment_definer table."\
                         %(segmentName))

    # Get the segmentlist corresponding to this segmentName in segment_summary
    segmentSummTable = table.get_table(xmldoc, "segment_summary")
    summSegList = segments.segmentlist([])
    for entry in segmentSummTable:
        if entry.segment_def_id == segDefID:
            segment = segments.segment(entry.start_time, entry.end_time)
            summSegList.append(segment)
    summSegList.coalesce()

    return summSegList
Exemplo n.º 21
0
def get_segment_summary_times(scienceFile, segmentName):
    """
    This function will find the times for which the segment_summary is set
    for the flag given by segmentName.

    Parameters
    -----------
    scienceFile : SegFile
        The segment file that we want to use to determine this.
    segmentName : string
        The DQ flag to search for times in the segment_summary table.

    Returns
    ---------
    summSegList : glue.segments.segmentlist
        The times that are covered in the segment summary table.
    """
    # Parse the segmentName
    segmentName = segmentName.split(':')
    if not len(segmentName) in [2,3]:
        raise ValueError("Invalid channel name %s." %(segmentName))
    ifo = segmentName[0]
    channel = segmentName[1]
    version = ''
    if len(segmentName) == 3:
        version = int(segmentName[2])

    # Load the filename
    xmldoc = utils.load_filename(scienceFile.cache_entry.path,
                             gz=scienceFile.cache_entry.path.endswith("gz"),
                             contenthandler=ContentHandler)

    # Get the segment_def_id for the segmentName
    segmentDefTable = table.get_table(xmldoc, "segment_definer")
    for entry in segmentDefTable:
        if (entry.ifos == ifo) and (entry.name == channel):
            if len(segmentName) == 2 or (entry.version==version):
                segDefID = entry.segment_def_id
                break
    else:
        raise ValueError("Cannot find channel %s in segment_definer table."\
                         %(segmentName))

    # Get the segmentlist corresponding to this segmentName in segment_summary
    segmentSummTable = table.get_table(xmldoc, "segment_summary")
    summSegList = segments.segmentlist([])
    for entry in segmentSummTable:
        if entry.segment_def_id == segDefID:
            segment = segments.segment(entry.start_time, entry.end_time)
            summSegList.append(segment)
    summSegList.coalesce()

    return summSegList
Exemplo n.º 22
0
        def pad_and_truncate(row_start, row_end):
            tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)])
            # No coalesce needed as a list with a single segment is already coalesced
            tmp &= search_span_list

            # The intersection is guaranteed to be non-empty if the row passed match()
            # PR 2969: The above comment is incorrect.  Negative padding may cause
            # an empty intersection.
            if len(tmp) == 0:
                return segment(0,0)
            else:
                return tmp[0]
Exemplo n.º 23
0
def segmenttable_get_by_name(xmldoc, name):
	"""
	Retrieve the segmentlists whose name equals name.  The result is a
	segmentlistdict indexed by instrument.

	The output of this function is not coalesced, each segmentlist
	contains the segments as found in the segment table.

	NOTE:  this is a light-weight version of the .get_by_name() method
	of the LigolwSegments class intended for use when the full
	machinery of that class is not required.  Considerably less
	document validation and error checking is performed by this
	version.  Consider using that method instead if your application
	will be interfacing with the document via that class anyway.
	"""
	#
	# find required tables
	#

	def_table = lsctables.SegmentDefTable.get_table(xmldoc)
	seg_table = lsctables.SegmentTable.get_table(xmldoc)

	#
	# segment_def_id --> instrument names mapping but only for
	# segment_definer entries bearing the requested name
	#

	instrument_index = dict((row.segment_def_id, row.instruments) for row in def_table if row.name == name)

	#
	# populate result segmentlistdict object from segment_def_map table
	# and index
	#

	instruments = set(instrument for instruments in instrument_index.values() for instrument in instruments)
	result = segments.segmentlistdict((instrument, segments.segmentlist()) for instrument in instruments)

	for row in seg_table:
		if row.segment_def_id in instrument_index:
			seg = row.segment
			for instrument in instrument_index[row.segment_def_id]:
				result[instrument].append(seg)

	#
	# done
	#

	return result
Exemplo n.º 24
0
def select_segments_by_definer(segment_file, segment_name=None, ifo=None):
    """ Return the list of segments that match the segment name
    
    Parameters
    ----------
    segment_file: str
        path to segment xml file
    
    segment_name: str
        Name of segment
    ifo: str, optional
    
    Returns
    -------
    seg: list of segments
    """
    from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler as h
    lsctables.use_in(h)
    indoc = ligolw_utils.load_filename(segment_file, False, contenthandler=h)
    segment_table = table.get_table(indoc, 'segment')

    seg_def_table = table.get_table(indoc, 'segment_definer')
    def_ifos = seg_def_table.getColumnByName('ifos')
    def_names = seg_def_table.getColumnByName('name')
    def_ids = seg_def_table.getColumnByName('segment_def_id')

    valid_id = []
    for def_ifo, def_name, def_id in zip(def_ifos, def_names, def_ids):
        if ifo and ifo != def_ifo:
            continue
        if segment_name and segment_name != def_name:
            continue
        valid_id += [def_id]

    start = numpy.array(segment_table.getColumnByName('start_time'))
    start_ns = numpy.array(segment_table.getColumnByName('start_time_ns'))
    end = numpy.array(segment_table.getColumnByName('end_time'))
    end_ns = numpy.array(segment_table.getColumnByName('end_time_ns'))
    start, end = start + 1e-9 * start_ns, end + 1e-9 * end_ns
    did = segment_table.getColumnByName('segment_def_id')

    keep = numpy.array([d in valid_id for d in did])
    if sum(keep) > 0:
        return start_end_to_segments(start[keep], end[keep])
    else:
        return segmentlist([])
Exemplo n.º 25
0
def select_segments_by_definer(segment_file, segment_name=None, ifo=None):
    """ Return the list of segments that match the segment name
    
    Parameters
    ----------
    segment_file: str
        path to segment xml file
    
    segment_name: str
        Name of segment
    ifo: str, optional
    
    Returns
    -------
    seg: list of segments
    """
    from pycbc_glue.ligolw.ligolw import LIGOLWContentHandler as h; lsctables.use_in(h)
    indoc = ligolw_utils.load_filename(segment_file, False, contenthandler=h)
    segment_table  = table.get_table(indoc, 'segment')

    seg_def_table = table.get_table(indoc, 'segment_definer')
    def_ifos = seg_def_table.getColumnByName('ifos')
    def_names = seg_def_table.getColumnByName('name')
    def_ids = seg_def_table.getColumnByName('segment_def_id')
    
    valid_id = []
    for def_ifo, def_name, def_id in zip(def_ifos, def_names, def_ids):
        if ifo and ifo != def_ifo:
            continue        
        if segment_name and segment_name != def_name:
            continue
        valid_id += [def_id]

    start = numpy.array(segment_table.getColumnByName('start_time'))
    start_ns = numpy.array(segment_table.getColumnByName('start_time_ns'))
    end = numpy.array(segment_table.getColumnByName('end_time'))
    end_ns = numpy.array(segment_table.getColumnByName('end_time_ns'))
    start, end = start + 1e-9 * start_ns, end + 1e-9 * end_ns
    did = segment_table.getColumnByName('segment_def_id')
    
    keep = numpy.array([d in valid_id for d in did])
    if sum(keep) > 0:
        return start_end_to_segments(start[keep], end[keep])
    else:
        return segmentlist([])
Exemplo n.º 26
0
def fromlalcache(cachefile, coltype=int):
    """
	Construct a segmentlist representing the times spanned by the files
	identified in the LAL cache contained in the file object file.  The
	segmentlist will be created with segments whose boundaries are of
	type coltype, which should raise ValueError if it cannot convert
	its string argument.

	Example:

	>>> from pycbc_glue.lal import LIGOTimeGPS
	>>> cache_seglists = fromlalcache(open(filename), coltype = LIGOTimeGPS).coalesce()

	See also:

	pycbc_glue.lal.CacheEntry
	"""
    return segments.segmentlist(
        lal.CacheEntry(l, coltype=coltype).segment for l in cachefile)
Exemplo n.º 27
0
def from_range_strings(ranges, boundtype=int):
    """
	Parse a list of ranges expressed as strings in the form "value" or
	"first:last" into an equivalent pycbc_glue.segments.segmentlist.  In the
	latter case, an empty string for "first" and(or) "last" indicates a
	(semi)infinite range.  A typical use for this function is in
	parsing command line options or entries in configuration files.

	NOTE:  the output is a segmentlist as described by the strings;  if
	the segments in the input file are not coalesced or out of order,
	then thusly shall be the output of this function.  It is
	recommended that this function's output be coalesced before use.

	Example:

	>>> text = "0:10,35,100:"
	>>> from_range_strings(text.split(","))
	[segment(0, 10), segment(35, 35), segment(100, infinity)]
	"""
    # preallocate segmentlist
    segs = segments.segmentlist([None] * len(ranges))

    # iterate over strings
    for i, range in enumerate(ranges):
        parts = range.split(":")
        if len(parts) == 1:
            parts = boundtype(parts[0])
            segs[i] = segments.segment(parts, parts)
            continue
        if len(parts) != 2:
            raise ValueError(range)
        if parts[0] == "":
            parts[0] = segments.NegInfinity
        else:
            parts[0] = boundtype(parts[0])
        if parts[1] == "":
            parts[1] = segments.PosInfinity
        else:
            parts[1] = boundtype(parts[1])
        segs[i] = segments.segment(parts[0], parts[1])

    # success
    return segs
Exemplo n.º 28
0
def get_missing_segs_from_frame_file_cache(datafindcaches):
    """
    This function will use os.path.isfile to determine if all the frame files
    returned by the local datafind server actually exist on the disk. This can
    then be used to update the science times if needed.

    Parameters
    -----------
    datafindcaches : OutGroupList
        List of all the datafind output files.

    Returns
    --------
    missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances
        The times corresponding to missing frames found in datafindOuts.
    missingFrames: Dict. of ifo keyed lal.Cache instances
        The list of missing frames
    """
    missingFrameSegs = {}
    missingFrames = {}
    for cache in datafindcaches:
        if len(cache) > 0:
            # Don't bother if these are not file:// urls, assume all urls in
            # one cache file must be the same type
            if not cache[0].scheme == 'file':
                warn_msg = "We have %s entries in the " %(cache[0].scheme,)
                warn_msg += "cache file. I do not check if these exist."
                logging.info(warn_msg)
                continue
            _, currMissingFrames = cache.checkfilesexist(on_missing="warn")
            missingSegs = segments.segmentlist(e.segment \
                                         for e in currMissingFrames).coalesce()
            ifo = cache.ifo
            if not missingFrameSegs.has_key(ifo):
                missingFrameSegs[ifo] = missingSegs
                missingFrames[ifo] = lal.Cache(currMissingFrames)
            else:
                missingFrameSegs[ifo].extend(missingSegs)
                # NOTE: This .coalesce probably isn't needed as the segments
                # should be disjoint. If speed becomes an issue maybe remove it?
                missingFrameSegs[ifo].coalesce()
                missingFrames[ifo].extend(currMissingFrames)
    return missingFrameSegs, missingFrames
Exemplo n.º 29
0
def get_missing_segs_from_frame_file_cache(datafindcaches):
    """
    This function will use os.path.isfile to determine if all the frame files
    returned by the local datafind server actually exist on the disk. This can
    then be used to update the science times if needed.

    Parameters
    -----------
    datafindcaches : OutGroupList
        List of all the datafind output files.

    Returns
    --------
    missingFrameSegs : Dict. of ifo keyed glue.segment.segmentlist instances
        The times corresponding to missing frames found in datafindOuts.
    missingFrames: Dict. of ifo keyed lal.Cache instances
        The list of missing frames
    """
    missingFrameSegs = {}
    missingFrames = {}
    for cache in datafindcaches:
        if len(cache) > 0:
            # Don't bother if these are not file:// urls, assume all urls in
            # one cache file must be the same type
            if not cache[0].scheme == 'file':
                warn_msg = "We have %s entries in the " % (cache[0].scheme, )
                warn_msg += "cache file. I do not check if these exist."
                logging.info(warn_msg)
                continue
            _, currMissingFrames = cache.checkfilesexist(on_missing="warn")
            missingSegs = segments.segmentlist(e.segment \
                                         for e in currMissingFrames).coalesce()
            ifo = cache.ifo
            if not missingFrameSegs.has_key(ifo):
                missingFrameSegs[ifo] = missingSegs
                missingFrames[ifo] = lal.Cache(currMissingFrames)
            else:
                missingFrameSegs[ifo].extend(missingSegs)
                # NOTE: This .coalesce probably isn't needed as the segments
                # should be disjoint. If speed becomes an issue maybe remove it?
                missingFrameSegs[ifo].coalesce()
                missingFrames[ifo].extend(currMissingFrames)
    return missingFrameSegs, missingFrames
Exemplo n.º 30
0
def Fold(seglist1, seglist2):
    """
	An iterator that generates the results of taking the intersection
	of seglist1 with each segment in seglist2 in turn.  In each result,
	the segment start and stop values are adjusted to be with respect
	to the start of the corresponding segment in seglist2.  See also
	the segmentlist_range() function.

	This has use in applications that wish to convert ranges of values
	to ranges relative to epoch boundaries.  Below, a list of time
	intervals in hours is converted to a sequence of daily interval
	lists with times relative to midnight.

	Example:

	>>> from pycbc_glue.segments import *
	>>> x = segmentlist([segment(0, 13), segment(14, 20), segment(22, 36)])
	>>> for y in Fold(x, segmentlist_range(0, 48, 24)): print y
	...
	[segment(0, 13), segment(14, 20), segment(22, 24)]
	[segment(0, 12)]
	"""
    for seg in seglist2:
        yield (seglist1 & segments.segmentlist([seg])).shift(-seg[0])
Exemplo n.º 31
0
  # SCIENCE_OK
  sciokSegFile = segIfoFiles.find_output_with_tag('SCIENCE_OK')
  assert(len(sciokSegFile) == 1)
  sciokSegFile = sciokSegFile[0]
  sciokSegs = sciokSegFile.segmentList
  # SCIENCE_AVAILABLE
  sciavailableSegFile = segIfoFiles.find_output_with_tag('SCIENCE_AVAILABLE')
  assert(len(sciavailableSegFile) == 1)
  sciavailableSegFile = sciavailableSegFile[0]
  sciavailableSegs = sciavailableSegFile.segmentList
  # ANALYSABLE - This one needs to come from inspiral outs
  analysableSegs = insps.get_times_covered_by_files()
 
  # And add these to the output file
  # Start with the segment summary
  summSegs = segments.segmentlist([workflow.analysis_time])
  sci_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo,
                                                    "CBC_DAYHOPE_SCIENCE", 0)
  sciok_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id, ifo,
                                                 "CBC_DAYHOPE_SCIENCE_OK", 0)
  sciavailable_def_id = segmentdb_utils.add_to_segment_definer(outdoc,
                            proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0)
  analysable_def_id = segmentdb_utils.add_to_segment_definer(outdoc, proc_id,
                                            ifo, "CBC_DAYHOPE_ANALYSABLE", 0)
  
  segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs)
  segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs)
  segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id,
                                 sciavailableSegs)
  segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id,
                                 analysableSegs)
Exemplo n.º 32
0
def start_end_to_segments(start, end):
    return segmentlist([segment(s, e) for s, e in zip(start, end)])
Exemplo n.º 33
0
def setup_datafind_workflow(workflow, scienceSegs, outputDir, seg_file=None,
                            tags=None):
    """
    Setup datafind section of the workflow. This section is responsible for
    generating, or setting up the workflow to generate, a list of files that
    record the location of the frame files needed to perform the analysis.
    There could be multiple options here, the datafind jobs could be done at
    run time or could be put into a dag. The subsequent jobs will know
    what was done here from the OutFileList containing the datafind jobs
    (and the Dagman nodes if appropriate.
    For now the only implemented option is to generate the datafind files at
    runtime. This module can also check if the frameFiles actually exist, check
    whether the obtained segments line up with the original ones and update the
    science segments to reflect missing data files.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        The workflow class that stores the jobs that will be run.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse.
    outputDir : path
        All output files written by datafind processes will be written to this
        directory.
    seg_file : SegFile, optional (default=None)
        The file returned by get_science_segments containing the science
        segments and the associated segment_summary. This will
        be used for the segment_summary test and is required if, and only if,
        performing that test.
    tags : list of string, optional (default=None)
        Use this to specify tags. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]).
        This is also used to tag the Files returned by the class to uniqueify
        the Files and uniqueify the actual filename.
        FIXME: Filenames may not be unique with current codes!

    Returns
    --------
    datafindOuts : OutGroupList
        List of all the datafind output files for use later in the pipeline.
    sci_avlble_file : SegFile
        SegFile containing the analysable time after checks in the datafind
        module are applied to the input segment list. For production runs this
        is expected to be equal to the input segment list.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse. If
        the updateSegmentTimes kwarg is given this will be updated to reflect
        any instances of missing data.
    sci_avlble_name : string
        The name with which the analysable time is stored in the
        sci_avlble_file.
    """
    if tags is None:
        tags = []
    logging.info("Entering datafind module")
    make_analysis_dir(outputDir)
    cp = workflow.cp

    # Parse for options in ini file
    datafindMethod = cp.get_opt_tags("workflow-datafind",
                                     "datafind-method", tags)

    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-gaps", tags):
        checkSegmentGaps = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-segment-gaps", tags)
    else:
        checkSegmentGaps = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-frames-exist", tags):
        checkFramesExist = cp.get_opt_tags("workflow-datafind",
                                          "datafind-check-frames-exist", tags)
    else:
        checkFramesExist = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-summary", tags):
        checkSegmentSummary = cp.get_opt_tags("workflow-datafind",
                                       "datafind-check-segment-summary", tags)
    else:
        checkSegmentSummary = "no_test"

    logging.info("Starting datafind with setup_datafind_runtime_generated")
    if datafindMethod == "AT_RUNTIME_MULTIPLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)

    elif datafindMethod == "FROM_PREGENERATED_LCF_FILES":
        ifos = scienceSegs.keys()
        datafindcaches, datafindouts = \
            setup_datafind_from_pregenerated_lcf_files(cp, ifos,
                                                       outputDir, tags=tags)
    else:
        msg = "Entry datafind-method in [workflow-datafind] does not have "
        msg += "expected value. Valid values are "
        msg += "AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES "
        msg += "AT_RUNTIME_MULTIPLE_CACHES or AT_RUNTIME_SINGLE_CACHES. "
        msg += "Consult the documentation for more info."
        raise ValueError(msg)

    using_backup_server = False
    if datafindMethod == "AT_RUNTIME_MULTIPLE_FRAMES" or \
                                  datafindMethod == "AT_RUNTIME_SINGLE_FRAMES":
        if cp.has_option_tags("workflow-datafind",
                          "datafind-backup-datafind-server", tags):
            using_backup_server = True
            backup_server = cp.get_opt_tags("workflow-datafind",
                                      "datafind-backup-datafind-server", tags)
            cp_new = copy.deepcopy(cp)
            cp_new.set("workflow-datafind",
                                "datafind-ligo-datafind-server", backup_server)
            cp_new.set('datafind', 'urltype', 'gsiftp')
            backup_datafindcaches, backup_datafindouts =\
                setup_datafind_runtime_frames_single_call_perifo(cp_new,
                                             scienceSegs, outputDir, tags=tags)
            backup_datafindouts = datafind_keep_unique_backups(\
                                             backup_datafindouts, datafindouts)
            datafindcaches.extend(backup_datafindcaches)
            datafindouts.extend(backup_datafindouts)

    logging.info("setup_datafind_runtime_generated completed")
    # If we don't have frame files covering all times we can update the science
    # segments.
    if checkSegmentGaps in ['warn','update_times','raise_error']:
        logging.info("Checking science segments against datafind output....")
        newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        logging.info("New segments calculated from data find output.....")
        missingData = False
        for ifo in scienceSegs.keys():
            # If no science segments in input then do nothing
            if not scienceSegs[ifo]:
                msg = "No science segments are present for ifo %s, " %(ifo)
                msg += "the segment metadata indicates there is no analyzable"
                msg += " strain data between the selected GPS start and end "
                msg += "times."
                logging.warning(msg)
                continue
            if not newScienceSegs.has_key(ifo):
                msg = "No data frames were found corresponding to the science "
                msg += "segments for ifo %s" %(ifo)
                logging.error(msg)
                missingData = True
                if checkSegmentGaps == 'update_times':
                    scienceSegs[ifo] = segments.segmentlist()
                continue
            missing = scienceSegs[ifo] - newScienceSegs[ifo]
            if abs(missing):
                msg = "From ifo %s we are missing frames covering:" %(ifo)
                msg += "\n%s" % "\n".join(map(str, missing))
                missingData = True
                logging.error(msg)
                if checkSegmentGaps == 'update_times':
                    # Remove missing time, so that we can carry on if desired
                    logging.info("Updating science segments for ifo %s."
                                 %(ifo))
                    scienceSegs[ifo] = scienceSegs[ifo] - missing

        if checkSegmentGaps == 'raise_error' and missingData:
            raise ValueError("Workflow cannot find needed data, exiting.")
        logging.info("Done checking, any discrepancies are reported above.")
    elif checkSegmentGaps == 'no_test':
        pass
    else:
        errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Do all of the frame files that were returned actually exist?
    if checkFramesExist in ['warn','update_times','raise_error']:
        logging.info("Verifying that all frames exist on disk.")
        missingFrSegs, missingFrames = \
                          get_missing_segs_from_frame_file_cache(datafindcaches)
        missingFlag = False
        for ifo in missingFrames.keys():
            # If no data in the input then do nothing
            if not scienceSegs[ifo]:
                continue
            # If using a backup server, does the frame exist remotely?
            if using_backup_server:
                # WARNING: This will be slow, but hopefully it will not occur
                #          for too many frames. This could be optimized if
                #          it becomes necessary.
                new_list = []
                for frame in missingFrames[ifo]:
                    for dfout in datafindouts:
                        dfout_pfns = list(dfout.pfns)
                        dfout_urls = [a.url for a in dfout_pfns]
                        if frame.url in dfout_urls:
                            pfn = dfout_pfns[dfout_urls.index(frame.url)]
                            dfout.removePFN(pfn)
                            if len(dfout.pfns) == 0:
                                new_list.append(frame)
                            else:
                                msg = "Frame %s not found locally. "\
                                                                  %(frame.url,)
                                msg += "Replacing with remote url(s) %s." \
                                           %(str([a.url for a in dfout.pfns]),)
                                logging.info(msg)
                            break
                    else:
                        new_list.append(frame)
                missingFrames[ifo] = new_list
            if missingFrames[ifo]:
                msg = "From ifo %s we are missing the following frames:" %(ifo)
                msg +='\n'.join([a.url for a in missingFrames[ifo]])
                missingFlag = True
                logging.error(msg)
            if checkFramesExist == 'update_times':
                # Remove missing times, so that we can carry on if desired
                logging.info("Updating science times for ifo %s." %(ifo))
                scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo]

        if checkFramesExist == 'raise_error' and missingFlag:
            raise ValueError("Workflow cannot find all frames, exiting.")
        logging.info("Finished checking frames.")
    elif checkFramesExist == 'no_test':
        pass
    else:
        errMsg = "checkFramesExist kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Check if there are cases where frames exist, but no entry in the segment
    # summary table are present.
    if checkSegmentSummary in ['warn', 'raise_error']:
        logging.info("Checking the segment summary table against frames.")
        dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        missingFlag = False
        # NOTE: Should this be overrideable in the config file?
        sci_seg_name = "SCIENCE"
        if seg_file is None:
            err_msg = "You must provide the science segments SegFile object "
            err_msg += "if using the datafind-check-segment-summary option."
            raise ValueError(err_msg)
        if seg_file.seg_summ_dict is None:
            err_msg = "The provided science segments SegFile object must "
            err_msg += "contain a valid segment_summary table if using the "
            err_msg += "datafind-check-segment-summary option."
            raise ValueError(err_msg)
        seg_summary_times = seg_file.seg_summ_dict
        for ifo in dfScienceSegs.keys():
            curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name]
            missing = (dfScienceSegs[ifo] & seg_file.valid_segments)
            missing.coalesce()
            missing = missing - curr_seg_summ_times
            missing.coalesce()
            scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo]
            scienceButNotFrame.coalesce()
            missing2 = scienceSegs[ifo] - scienceButNotFrame
            missing2.coalesce()
            missing2 = missing2 - curr_seg_summ_times
            missing2.coalesce()
            if abs(missing):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "but are not covered in the segment summary table."
                msg += "\n%s" % "\n".join(map(str, missing))
                logging.error(msg)
                missingFlag = True
            if abs(missing2):
                msg = "From ifo %s the following times have frames, " %(ifo)
                msg += "are science, and are not covered in the segment "
                msg += "summary table."
                msg += "\n%s" % "\n".join(map(str, missing2))
                logging.error(msg)
                missingFlag = True
        if checkSegmentSummary == 'raise_error' and missingFlag:
            errMsg = "Segment_summary discrepancy detected, exiting."
            raise ValueError(errMsg)
    elif checkSegmentSummary == 'no_test':
        pass
    else:
        errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', "
        errMsg += "'warn', or 'raise_error'."
        raise ValueError(errMsg)

    # Now need to create the file for SCIENCE_AVAILABLE
    sci_avlble_dict = segments.segmentlistdict()
    # NOTE: Should this be overrideable in the config file?
    sci_avlble_name = "SCIENCE_AVAILABLE"
    for ifo in scienceSegs.keys():
        sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo]

    sci_avlble_file = SegFile.from_segment_list_dict('SCIENCE_AVAILABLE',
                            sci_avlble_dict, ifo_list = scienceSegs.keys(),
                            valid_segment=workflow.analysis_time,
                            extension='.xml', tags=tags, directory=outputDir)

    logging.info("Leaving datafind module")
    return FileList(datafindouts), sci_avlble_file, scienceSegs, sci_avlble_name
Exemplo n.º 34
0
def make_grb_segments_plot(wkflow,
                           science_segs,
                           trigger_time,
                           trigger_name,
                           out_dir,
                           coherent_seg=None,
                           fail_criterion=None):

    ifos = wkflow.ifos
    if len(science_segs.keys()) == 0:
        extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")),
                                  int(wkflow.cp.get("workflow", "end-time")))
    else:
        pltpad = [
            science_segs.extent_all()[1] - trigger_time,
            trigger_time - science_segs.extent_all()[0]
        ]
        extent = segments.segmentlist([
            science_segs.extent_all(),
            segments.segment(trigger_time - pltpad[0],
                             trigger_time + pltpad[1])
        ]).extent()

    ifo_colors = {}
    for ifo in ifos:
        ifo_colors[ifo] = ifo_color(ifo)
        if ifo not in science_segs.keys():
            science_segs[ifo] = segments.segmentlist([])

    # Make plot
    fig, subs = plt.subplots(len(ifos), sharey=True)
    plt.xticks(rotation=20, ha='right')
    for sub, ifo in zip(subs, ifos):
        for seg in science_segs[ifo]:
            sub.add_patch(
                Rectangle((seg[0], 0.1),
                          abs(seg),
                          0.8,
                          facecolor=ifo_colors[ifo],
                          edgecolor='none'))
        if coherent_seg:
            if len(science_segs[ifo]) > 0 and \
                    coherent_seg in science_segs[ifo]:
                sub.plot([trigger_time, trigger_time], [0, 1], '-', c='orange')
                sub.add_patch(
                    Rectangle((coherent_seg[0], 0),
                              abs(coherent_seg),
                              1,
                              alpha=0.5,
                              facecolor='orange',
                              edgecolor='none'))
            else:
                sub.plot([trigger_time, trigger_time], [0, 1], ':', c='orange')
                sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1],
                         '--',
                         c='orange',
                         alpha=0.5)
                sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1],
                         '--',
                         c='orange',
                         alpha=0.5)
        else:
            sub.plot([trigger_time, trigger_time], [0, 1], ':k')
        if fail_criterion:
            if len(science_segs[ifo]) > 0:
                style_str = '--'
            else:
                style_str = '-'
            sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1],
                     style_str,
                     c='black',
                     alpha=0.5)
            sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1],
                     style_str,
                     c='black',
                     alpha=0.5)

        sub.set_frame_on(False)
        sub.set_yticks([])
        sub.set_ylabel(ifo, rotation=45)
        sub.set_ylim([0, 1])
        sub.set_xlim([float(extent[0]), float(extent[1])])
        sub.get_xaxis().get_major_formatter().set_useOffset(False)
        sub.get_xaxis().get_major_formatter().set_scientific(False)
        sub.get_xaxis().tick_bottom()
        if sub is subs[-1]:
            sub.tick_params(labelsize=10, pad=1)
        else:
            sub.get_xaxis().set_ticks([])
            sub.get_xaxis().set_ticklabels([])

    xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval()
    ymin, _ = fig.axes[-1].get_yaxis().get_view_interval()
    fig.axes[-1].add_artist(
        Line2D((xmin, xmax), (ymin, ymin), color='black', linewidth=2))
    fig.axes[-1].set_xlabel('GPS Time')

    fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name)
    plt.tight_layout()
    fig.subplots_adjust(hspace=0)

    plot_name = 'GRB%s_segments.png' % trigger_name
    plot_url = 'file://localhost%s/%s' % (out_dir, plot_name)
    fig.savefig('%s/%s' % (out_dir, plot_name))

    return [ifos, plot_name, extent, plot_url]
Exemplo n.º 35
0
def start_end_to_segments(start, end):
    return segmentlist([segment(s, e) for s, e in zip(start, end)])
Exemplo n.º 36
0
def query_segments(engine, table, segdefs):
    # each segdef is a list containing:
    #     ifo, name, version, start_time, end_time, start_pad, end_pad


    # The trivial case: if there's nothing to do, return no time
    if len(segdefs) == 0:
        return [ segmentlist([]) ]

    #
    # For the sake of efficiency we query the database for all the segdefs at once
    # This constructs a clause that looks for one
    #
    def make_clause(table, segdef):
        ifo, name, version, start_time, end_time, start_pad, end_pad = segdef

        sql = " (segment_definer.ifos = '%s' " % ifo
        sql += "AND segment_definer.name = '%s' " % name
        sql += "AND segment_definer.version = %s " % version
        sql += "AND NOT (%d > %s.end_time OR %s.start_time > %d)) " % (start_time, table, table, end_time)

        return sql

    clauses = [make_clause(table, segdef) for segdef in segdefs]

    sql  = 'SELECT segment_definer.ifos, segment_definer.name, segment_definer.version, '
    sql += ' %s.start_time, %s.end_time ' % (table, table)
    sql += ' FROM segment_definer, %s '   % table
    sql += ' WHERE %s.segment_def_id = segment_definer.segment_def_id AND ' % table

    if engine.__class__ == query_engine.LdbdQueryEngine:
        sql += " %s.segment_def_cdb = segment_definer.creator_db AND " % table
    
    sql += '( ' + ' OR '.join(clauses) + ' )'

    rows = engine.query(sql)

    #
    # The result of a query will be rows of the form
    #    ifo, name, version, start_time, end_time
    #
    # We want to associate each returned row with the segdef it belongs to so that
    # we can apply the correct padding.
    #
    # If segdefs were uniquely spcified by (ifo, name, version) this would
    # be easy, but it may happen that we're looking for the same segment definer
    # at multiple disjoint times.  In particular this can happen if the user
    # didn't specify a version number; in that case we might have version 2
    # of some flag defined over multiple disjoint segment_definers.
    #
    results = []

    for segdef in segdefs:
        ifo, name, version, start_time, end_time, start_pad, end_pad = segdef

        search_span      = segment(start_time, end_time)
        search_span_list = segmentlist([search_span])

        # See whether the row belongs to the current segdef.  Name, ifo and version must match
        # and the padded segment must overlap with the range of the segdef.
        def matches(row):
            return ( row[0].strip() == ifo and row[1] == name and int(row[2]) == int(version)
                     and search_span.intersects(segment(row[3] + start_pad, row[4] + start_pad)) )

        # Add the padding.  Segments may extend beyond the time of interest, chop off the excess.
        def pad_and_truncate(row_start, row_end):
            tmp = segmentlist([segment(row_start + start_pad, row_end + end_pad)])
            # No coalesce needed as a list with a single segment is already coalesced
            tmp &= search_span_list

            # The intersection is guaranteed to be non-empty if the row passed match()
            # PR 2969: The above comment is incorrect.  Negative padding may cause
            # an empty intersection.
            if len(tmp) == 0:
                return segment(0,0)
            else:
                return tmp[0]

        # Build a segment list from the returned segments, padded and trunctated.  The segments will
        # not necessarily be disjoint, if the padding crosses gaps.  They are also not gauranteed to
        # be in order, since there's no ORDER BY in the query.  So the list needs to be coalesced
        # before arithmatic can be done with it.
        result  = segmentlist( [pad_and_truncate(row[3], row[4]) for row in rows if matches(row)] ).coalesce()

        # This is not needed: since each of the segments are constrained to be within the search
        # span the whole list must be as well.
        # result &= search_span_list
        
        results.append(result)

    return results
Exemplo n.º 37
0
def make_grb_segments_plot(wkflow, science_segs, trigger_time, trigger_name,
                           out_dir, coherent_seg=None, fail_criterion=None):
    
    ifos = wkflow.ifos
    if len(science_segs.keys()) == 0:
        extent = segments.segment(int(wkflow.cp.get("workflow", "start-time")),
                                  int(wkflow.cp.get("workflow", "end-time")))
    else:
        pltpad = [science_segs.extent_all()[1] - trigger_time,
                  trigger_time - science_segs.extent_all()[0]]
        extent = segments.segmentlist([science_segs.extent_all(),
            segments.segment(trigger_time - pltpad[0],
                             trigger_time + pltpad[1])]).extent()

    ifo_colors = {}
    for ifo in ifos:
        ifo_colors[ifo] = ifo_color(ifo)
        if ifo not in science_segs.keys():
            science_segs[ifo] = segments.segmentlist([])

    # Make plot
    fig, subs = plt.subplots(len(ifos), sharey=True)
    plt.xticks(rotation=20, ha='right')
    for sub, ifo in zip(subs, ifos):
        for seg in science_segs[ifo]:
            sub.add_patch(Rectangle((seg[0], 0.1), abs(seg), 0.8,
                                    facecolor=ifo_colors[ifo], edgecolor='none'))
        if coherent_seg:
            if len(science_segs[ifo]) > 0 and \
                    coherent_seg in science_segs[ifo]:
                sub.plot([trigger_time, trigger_time], [0, 1], '-',
                         c='orange')
                sub.add_patch(Rectangle((coherent_seg[0], 0),
                                        abs(coherent_seg), 1, alpha=0.5,
                                        facecolor='orange', edgecolor='none'))
            else:
                sub.plot([trigger_time, trigger_time], [0, 1], ':',
                         c='orange')
                sub.plot([coherent_seg[0], coherent_seg[0]], [0, 1], '--',
                         c='orange', alpha=0.5)
                sub.plot([coherent_seg[1], coherent_seg[1]], [0, 1], '--',
                         c='orange', alpha=0.5)
        else:
            sub.plot([trigger_time, trigger_time], [0, 1], ':k')
        if fail_criterion:
            if len(science_segs[ifo]) > 0:
                style_str = '--'
            else:
                style_str = '-'
            sub.plot([fail_criterion[0], fail_criterion[0]], [0, 1], style_str,
                     c='black', alpha=0.5)
            sub.plot([fail_criterion[1], fail_criterion[1]], [0, 1], style_str,
                     c='black', alpha=0.5)

        sub.set_frame_on(False)
        sub.set_yticks([])
        sub.set_ylabel(ifo, rotation=45)
        sub.set_ylim([0, 1])
        sub.set_xlim([float(extent[0]), float(extent[1])])
        sub.get_xaxis().get_major_formatter().set_useOffset(False)
        sub.get_xaxis().get_major_formatter().set_scientific(False)
        sub.get_xaxis().tick_bottom()
        if sub is subs[-1]:
            sub.tick_params(labelsize=10, pad=1)
        else:
            sub.get_xaxis().set_ticks([])
            sub.get_xaxis().set_ticklabels([])

    xmin, xmax = fig.axes[-1].get_xaxis().get_view_interval()
    ymin, _ = fig.axes[-1].get_yaxis().get_view_interval()
    fig.axes[-1].add_artist(Line2D((xmin, xmax), (ymin, ymin), color='black',
                                   linewidth=2))
    fig.axes[-1].set_xlabel('GPS Time')

    fig.axes[0].set_title('Science Segments for GRB%s' % trigger_name)
    plt.tight_layout()
    fig.subplots_adjust(hspace=0)
    
    plot_name = 'GRB%s_segments.png' % trigger_name
    plot_url = 'file://localhost%s/%s' % (out_dir, plot_name)
    fig.savefig('%s/%s' % (out_dir, plot_name))

    return [ifos, plot_name, extent, plot_url]
Exemplo n.º 38
0
    # SCIENCE_OK
    sciokSegFile = segIfoFiles.find_output_with_tag('SCIENCE_OK')
    assert (len(sciokSegFile) == 1)
    sciokSegFile = sciokSegFile[0]
    sciokSegs = sciokSegFile.segmentList
    # SCIENCE_AVAILABLE
    sciavailableSegFile = segIfoFiles.find_output_with_tag('SCIENCE_AVAILABLE')
    assert (len(sciavailableSegFile) == 1)
    sciavailableSegFile = sciavailableSegFile[0]
    sciavailableSegs = sciavailableSegFile.segmentList
    # ANALYSABLE - This one needs to come from inspiral outs
    analysableSegs = insps.get_times_covered_by_files()

    # And add these to the output file
    # Start with the segment summary
    summSegs = segments.segmentlist([workflow.analysis_time])
    sci_def_id = segmentdb_utils.add_to_segment_definer(
        outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE", 0)
    sciok_def_id = segmentdb_utils.add_to_segment_definer(
        outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_OK", 0)
    sciavailable_def_id = segmentdb_utils.add_to_segment_definer(
        outdoc, proc_id, ifo, "CBC_DAYHOPE_SCIENCE_AVAILABLE", 0)
    analysable_def_id = segmentdb_utils.add_to_segment_definer(
        outdoc, proc_id, ifo, "CBC_DAYHOPE_ANALYSABLE", 0)

    segmentdb_utils.add_to_segment(outdoc, proc_id, sci_def_id, sciSegs)
    segmentdb_utils.add_to_segment(outdoc, proc_id, sciok_def_id, sciokSegs)
    segmentdb_utils.add_to_segment(outdoc, proc_id, sciavailable_def_id,
                                   sciavailableSegs)
    segmentdb_utils.add_to_segment(outdoc, proc_id, analysable_def_id,
                                   analysableSegs)
Exemplo n.º 39
0
def coalesce_seg(database, start_time, end_time):
  ret = 0            #assume execution successufl

  try:
    st = int(start_time)
    et = int(end_time)
    db = str(database.strip())
    #-------------------------------------------------------------------
    # Set up environment and get needed values
    #-------------------------------------------------------------------
    # Set up connection to the database
    dbconn = DB2.connect(dsn=db, uid='', pwd='', autoCommit=True)
    curs = dbconn.cursor()

    # create a new process_id
    sql = "select hex(GENERATE_UNIQUE()) from sysibm.sysdummy1"
    curs.execute(sql)
    hex_procid = curs.fetchone()[0]
    process_id = 'x' + '\'' + hex_procid + '\''

    # determine the local creator_db
    sql = "SELECT DEFAULT FROM SYSCAT.COLUMNS WHERE "
    sql += "TABNAME = 'PROCESS' AND COLNAME = 'CREATOR_DB'"
    curs.execute(sql)
    creator_db = int(curs.fetchone()[0])

  
    # prepare values for the new row to be inserted into the process table
    program = os.path.abspath(sys.argv[0])
    node = socket.gethostname()
    username = pwd.getpwuid(os.getuid()).pw_name
    unix_procid = os.getpid()
    proc_start_time = gpstime.GpsSecondsFromPyUTC(time.time())
    end_time = None
    jobid = 0
    domain = 'coalesce_local'

    # insert new row into process table
    sql = "INSERT INTO process "
    sql += "(program, is_online, node, username, unix_procid, start_time, jobid, domain, process_id, creator_db) "
    sql += "VALUES ('%s', 0, '%s', '%s', %d, %d, %d, '%s',%s, %d)" % (program, node, username, unix_procid, proc_start_time, jobid, domain, process_id, creator_db)
    curs.execute(sql)

    # get the BLOB process_id for later reference
    sql = "SELECT BLOB(process_id) from process where hex(process_id)='%s' " % hex_procid
    curs.execute(sql)
    blob_procid = curs.fetchone()[0]


    #========================================================================
    #
    #                                Main
    #
    #========================================================================
    # Algorithm:
    # 1. Find distinct version 1 segment type from segment_summary table witnin start_time, end_time range 
    # 2. Find segments and intervals to coalesce
    # 3. Coalesce segments and intervals
    # 4. Insert coaleseced segments back in to the database
    # 5. Delete uncoalesced segments and intervals from the database


    # 1. Find distinct segment types matching our criteria from segment_summary within the specified time range
    sql  = "SELECT distinct(hex(segment_summary.segment_def_id)) FROM segment_summary, segment_definer, process "
    sql += "WHERE segment_summary.segment_def_id=segment_definer.segment_def_id "
    sql += "AND segment_summary.segment_def_cdb=segment_definer.creator_db "
    sql += "AND segment_summary.process_id=process.process_id "
    sql += "AND segment_summary.creator_db=process.creator_db "
    # Removed next line so that all segments are coalesced: this will be slower up front but faster for queries and the long run
    #sql += "AND ((segment_definer.name like 'DMT-%' and segment_definer.version=1) or (process.ifos='V1' and process.program='SegOnline')) "
    sql += "AND segment_summary.start_time <=%d " % et
    sql += "AND segment_summary.end_time >= %d " % st
    curs.execute(sql)
    def_ids = curs.fetchall()
    if not def_ids:
      data_existence = 0
    else:
      data_existence = 1

    # loop in the segment types to fetch, coalesce, insert and delete
    for d in def_ids:
      # get the BLOB segment_def_id for later use 
      sql = "SELECT BLOB(segment_def_id), ifos, name, version, creator_db " 
      sql += "FROM segment_definer " 
      sql += "WHERE hex(segment_def_id) = '%s' " % d[0]

      curs.execute(sql)
      result = curs.fetchone()
      blob_defid = result[0]
      ifos = result[1].strip() 
      name = result[2]
      ver = result[3]
      def_cdb = result[4]

      # 2. Find segments and intervals to coalesce
      # get the segment start_time, end_time to coalesce, and according primary key to delete
      try:
        curs.execute("drop view seg_view")
      except:
        pass
      sql = "CREATE view seg_view (st,et,seg_id) AS "
      sql += "SELECT start_time,end_time, segment_id from segment "
      sql += "WHERE hex(segment_def_id) = '%s' " % d[0]
      sql += "AND segment.start_time <=%d " % et
      sql += "AND segment.end_time >= %d " % st
      print >> sys.stdout, ("Selecting segments to coalesce for %s version:%d %s ... " % (ifos,ver, name))
      curs.execute(sql)

      curs.execute("SELECT st,et from seg_view")
      seg_bf_cos = curs.fetchall()   # get the segments to coalesce

      # get the summary start_time, end_time to coalesce, and according primary key to delete
      try:
        curs.execute("drop view sum_view")
      except:
        pass
      sql = "CREATE view sum_view (st,et,sum_id) AS "
      sql += "SELECT start_time,end_time, segment_sum_id from segment_summary "
      sql += "WHERE hex(segment_def_id) = '%s' " % d[0]
      sql += "AND segment_summary.start_time <=%d " % et
      sql += "AND segment_summary.end_time >= %d " % st
      curs.execute(sql)

      curs.execute("SELECT st,et from sum_view")
      sum_bf_cos = curs.fetchall()   # get the summarys to coalesce

      # 3. Coalesce segments and intervals
      print >> sys.stdout, "Coalescing segments ... "
      segs = segments.segmentlist([]) 
      sums = segments.segmentlist([]) 
      for bf in seg_bf_cos:
        seg = segments.segment(int(bf[0]), int(bf[1]))
        segs.append(seg) 
      for bf in sum_bf_cos:
        sum = segments.segment(int(bf[0]), int(bf[1]))
        sums.append(sum) 

      segs.coalesce()
      sums.coalesce()


      # 4. Insert coaleseced segments back in to the database
      # insert coalesced segs into segment table
      insert_list = []
      for s in segs:
        # generate unique id for insertion
        curs.execute("VALUES BLOB(GENERATE_UNIQUE())")
        prim_id = curs.fetchone()[0]
        # generate a list of values to insert using executemany()
        insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid))

      sql = "INSERT INTO segment "
      sql += "(segment_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) "
      sql += "VALUES (?,?,?,?,?,?,?) "
      print >> sys.stdout, "Inserting coalesced segments back in ... "
      curs.executemany(sql, insert_list)

      # insert coalesced sums into segment_summary table
      insert_list = []
      for s in sums:
        # generate unique id for insertion
        curs.execute("VALUES BLOB(GENERATE_UNIQUE())")
        prim_id = curs.fetchone()[0]
        # generate a list of values to insert using executemany()
        insert_list.append((prim_id, creator_db, s[0], s[1], blob_defid, def_cdb, blob_procid))
      sql = "INSERT INTO segment_summary "
      sql += "(segment_sum_id, creator_db, start_time, end_time, segment_def_id, segment_def_cdb, process_id) "
      sql += "VALUES (?,?,?,?,?,?,?) "
      curs.executemany(sql, insert_list)

      # 5. Delete uncoalesced segments and intervals from the database
      print >> sys.stdout, "Deleting un-coaleseced segments ... "
      print >> sys.stdout 
      sql = "DELETE FROM segment "
      sql += "WHERE segment_id in (select seg_id from seg_view) "
      sql += "AND process_id != %s " % process_id
      curs.execute(sql)

      sql = "DELETE FROM segment_summary "
      sql += "WHERE segment_sum_id in (select sum_id from sum_view) "
      sql += "AND process_id != %s " % process_id
      curs.execute(sql)

    # update end_time in process table
    sql = "update process set end_time=%d where hex(process_id)='%s' " % (gpstime.GpsSecondsFromPyUTC(time.time()),hex_procid)
    curs.execute(sql)
  
    try:  
      curs.execute("drop view seg_view")
      curs.execute("drop view sum_view")
    except:
      pass
    curs.close()

  except Exception,e:
    ret = str(e)
    print >> sys.stdout, ("%s" % ret)
Exemplo n.º 40
0
def setup_datafind_workflow(workflow,
                            scienceSegs,
                            outputDir,
                            seg_file=None,
                            tags=None):
    """
    Setup datafind section of the workflow. This section is responsible for
    generating, or setting up the workflow to generate, a list of files that
    record the location of the frame files needed to perform the analysis.
    There could be multiple options here, the datafind jobs could be done at
    run time or could be put into a dag. The subsequent jobs will know
    what was done here from the OutFileList containing the datafind jobs
    (and the Dagman nodes if appropriate.
    For now the only implemented option is to generate the datafind files at
    runtime. This module can also check if the frameFiles actually exist, check
    whether the obtained segments line up with the original ones and update the
    science segments to reflect missing data files.

    Parameters
    ----------
    workflow: pycbc.workflow.core.Workflow
        The workflow class that stores the jobs that will be run.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse.
    outputDir : path
        All output files written by datafind processes will be written to this
        directory.
    seg_file : SegFile, optional (default=None)
        The file returned by get_science_segments containing the science
        segments and the associated segment_summary. This will
        be used for the segment_summary test and is required if, and only if,
        performing that test.
    tags : list of string, optional (default=None)
        Use this to specify tags. This can be used if this module is being
        called more than once to give call specific configuration (by setting
        options in [workflow-datafind-${TAG}] rather than [workflow-datafind]).
        This is also used to tag the Files returned by the class to uniqueify
        the Files and uniqueify the actual filename.
        FIXME: Filenames may not be unique with current codes!

    Returns
    --------
    datafindOuts : OutGroupList
        List of all the datafind output files for use later in the pipeline.
    sci_avlble_file : SegFile
        SegFile containing the analysable time after checks in the datafind
        module are applied to the input segment list. For production runs this
        is expected to be equal to the input segment list.
    scienceSegs : Dictionary of ifo keyed glue.segment.segmentlist instances
        This contains the times that the workflow is expected to analyse. If
        the updateSegmentTimes kwarg is given this will be updated to reflect
        any instances of missing data.
    sci_avlble_name : string
        The name with which the analysable time is stored in the
        sci_avlble_file.
    """
    if tags is None:
        tags = []
    logging.info("Entering datafind module")
    make_analysis_dir(outputDir)
    cp = workflow.cp

    # Parse for options in ini file
    datafind_method = cp.get_opt_tags("workflow-datafind", "datafind-method",
                                      tags)

    if cp.has_option_tags("workflow-datafind", "datafind-check-segment-gaps",
                          tags):
        checkSegmentGaps = cp.get_opt_tags("workflow-datafind",
                                           "datafind-check-segment-gaps", tags)
    else:
        checkSegmentGaps = "no_test"
    if cp.has_option_tags("workflow-datafind", "datafind-check-frames-exist",
                          tags):
        checkFramesExist = cp.get_opt_tags("workflow-datafind",
                                           "datafind-check-frames-exist", tags)
    else:
        checkFramesExist = "no_test"
    if cp.has_option_tags("workflow-datafind",
                          "datafind-check-segment-summary", tags):
        checkSegmentSummary = cp.get_opt_tags(
            "workflow-datafind", "datafind-check-segment-summary", tags)
    else:
        checkSegmentSummary = "no_test"

    logging.info("Starting datafind with setup_datafind_runtime_generated")
    if datafind_method == "AT_RUNTIME_MULTIPLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafind_method == "AT_RUNTIME_SINGLE_CACHES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_cache_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_multi_calls_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafind_method == "AT_RUNTIME_SINGLE_FRAMES":
        datafindcaches, datafindouts = \
            setup_datafind_runtime_frames_single_call_perifo(cp, scienceSegs,
                                                          outputDir, tags=tags)
    elif datafind_method == "AT_RUNTIME_FAKE_DATA":
        pass
    elif datafind_method == "FROM_PREGENERATED_LCF_FILES":
        ifos = scienceSegs.keys()
        datafindcaches, datafindouts = \
            setup_datafind_from_pregenerated_lcf_files(cp, ifos,
                                                       outputDir, tags=tags)
    else:
        msg = """Entry datafind-method in [workflow-datafind] does not have "
              expected value. Valid values are 
              AT_RUNTIME_MULTIPLE_FRAMES, AT_RUNTIME_SINGLE_FRAMES 
              AT_RUNTIME_MULTIPLE_CACHES, AT_RUNTIME_SINGLE_CACHES,
              FROM_PREGENERATED_LCF_FILES, or AT_RUNTIME_FAKE_DATA.
              Consult the documentation for more info."""
        raise ValueError(msg)

    using_backup_server = False
    if datafind_method == "AT_RUNTIME_MULTIPLE_FRAMES" or \
                                  datafind_method == "AT_RUNTIME_SINGLE_FRAMES":
        if cp.has_option_tags("workflow-datafind",
                              "datafind-backup-datafind-server", tags):
            using_backup_server = True
            backup_server = cp.get_opt_tags("workflow-datafind",
                                            "datafind-backup-datafind-server",
                                            tags)
            cp_new = copy.deepcopy(cp)
            cp_new.set("workflow-datafind", "datafind-ligo-datafind-server",
                       backup_server)
            cp_new.set('datafind', 'urltype', 'gsiftp')
            backup_datafindcaches, backup_datafindouts =\
                setup_datafind_runtime_frames_single_call_perifo(cp_new,
                                             scienceSegs, outputDir, tags=tags)
            backup_datafindouts = datafind_keep_unique_backups(\
                                             backup_datafindouts, datafindouts)
            datafindcaches.extend(backup_datafindcaches)
            datafindouts.extend(backup_datafindouts)

    logging.info("setup_datafind_runtime_generated completed")
    # If we don't have frame files covering all times we can update the science
    # segments.
    if checkSegmentGaps in ['warn', 'update_times', 'raise_error']:
        logging.info("Checking science segments against datafind output....")
        newScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        logging.info("New segments calculated from data find output.....")
        missingData = False
        for ifo in scienceSegs.keys():
            # If no science segments in input then do nothing
            if not scienceSegs[ifo]:
                msg = "No science segments are present for ifo %s, " % (ifo)
                msg += "the segment metadata indicates there is no analyzable"
                msg += " strain data between the selected GPS start and end "
                msg += "times."
                logging.warning(msg)
                continue
            if not newScienceSegs.has_key(ifo):
                msg = "No data frames were found corresponding to the science "
                msg += "segments for ifo %s" % (ifo)
                logging.error(msg)
                missingData = True
                if checkSegmentGaps == 'update_times':
                    scienceSegs[ifo] = segments.segmentlist()
                continue
            missing = scienceSegs[ifo] - newScienceSegs[ifo]
            if abs(missing):
                msg = "From ifo %s we are missing frames covering:" % (ifo)
                msg += "\n%s" % "\n".join(map(str, missing))
                missingData = True
                logging.error(msg)
                if checkSegmentGaps == 'update_times':
                    # Remove missing time, so that we can carry on if desired
                    logging.info("Updating science segments for ifo %s." %
                                 (ifo))
                    scienceSegs[ifo] = scienceSegs[ifo] - missing

        if checkSegmentGaps == 'raise_error' and missingData:
            raise ValueError("Workflow cannot find needed data, exiting.")
        logging.info("Done checking, any discrepancies are reported above.")
    elif checkSegmentGaps == 'no_test':
        pass
    else:
        errMsg = "checkSegmentGaps kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Do all of the frame files that were returned actually exist?
    if checkFramesExist in ['warn', 'update_times', 'raise_error']:
        logging.info("Verifying that all frames exist on disk.")
        missingFrSegs, missingFrames = \
                          get_missing_segs_from_frame_file_cache(datafindcaches)
        missingFlag = False
        for ifo in missingFrames.keys():
            # If no data in the input then do nothing
            if not scienceSegs[ifo]:
                continue
            # If using a backup server, does the frame exist remotely?
            if using_backup_server:
                # WARNING: This will be slow, but hopefully it will not occur
                #          for too many frames. This could be optimized if
                #          it becomes necessary.
                new_list = []
                for frame in missingFrames[ifo]:
                    for dfout in datafindouts:
                        dfout_pfns = list(dfout.pfns)
                        dfout_urls = [a.url for a in dfout_pfns]
                        if frame.url in dfout_urls:
                            pfn = dfout_pfns[dfout_urls.index(frame.url)]
                            dfout.removePFN(pfn)
                            if len(dfout.pfns) == 0:
                                new_list.append(frame)
                            else:
                                msg = "Frame %s not found locally. "\
                                                                  %(frame.url,)
                                msg += "Replacing with remote url(s) %s." \
                                           %(str([a.url for a in dfout.pfns]),)
                                logging.info(msg)
                            break
                    else:
                        new_list.append(frame)
                missingFrames[ifo] = new_list
            if missingFrames[ifo]:
                msg = "From ifo %s we are missing the following frames:" % (
                    ifo)
                msg += '\n'.join([a.url for a in missingFrames[ifo]])
                missingFlag = True
                logging.error(msg)
            if checkFramesExist == 'update_times':
                # Remove missing times, so that we can carry on if desired
                logging.info("Updating science times for ifo %s." % (ifo))
                scienceSegs[ifo] = scienceSegs[ifo] - missingFrSegs[ifo]

        if checkFramesExist == 'raise_error' and missingFlag:
            raise ValueError("Workflow cannot find all frames, exiting.")
        logging.info("Finished checking frames.")
    elif checkFramesExist == 'no_test':
        pass
    else:
        errMsg = "checkFramesExist kwarg must take a value from 'no_test', "
        errMsg += "'warn', 'update_times' or 'raise_error'."
        raise ValueError(errMsg)

    # Check if there are cases where frames exist, but no entry in the segment
    # summary table are present.
    if checkSegmentSummary in ['warn', 'raise_error']:
        logging.info("Checking the segment summary table against frames.")
        dfScienceSegs = get_science_segs_from_datafind_outs(datafindcaches)
        missingFlag = False
        # NOTE: Should this be overrideable in the config file?
        sci_seg_name = "SCIENCE"
        if seg_file is None:
            err_msg = "You must provide the science segments SegFile object "
            err_msg += "if using the datafind-check-segment-summary option."
            raise ValueError(err_msg)
        if seg_file.seg_summ_dict is None:
            err_msg = "The provided science segments SegFile object must "
            err_msg += "contain a valid segment_summary table if using the "
            err_msg += "datafind-check-segment-summary option."
            raise ValueError(err_msg)
        seg_summary_times = seg_file.seg_summ_dict
        for ifo in dfScienceSegs.keys():
            curr_seg_summ_times = seg_summary_times[ifo + ":" + sci_seg_name]
            missing = (dfScienceSegs[ifo] & seg_file.valid_segments)
            missing.coalesce()
            missing = missing - curr_seg_summ_times
            missing.coalesce()
            scienceButNotFrame = scienceSegs[ifo] - dfScienceSegs[ifo]
            scienceButNotFrame.coalesce()
            missing2 = scienceSegs[ifo] - scienceButNotFrame
            missing2.coalesce()
            missing2 = missing2 - curr_seg_summ_times
            missing2.coalesce()
            if abs(missing):
                msg = "From ifo %s the following times have frames, " % (ifo)
                msg += "but are not covered in the segment summary table."
                msg += "\n%s" % "\n".join(map(str, missing))
                logging.error(msg)
                missingFlag = True
            if abs(missing2):
                msg = "From ifo %s the following times have frames, " % (ifo)
                msg += "are science, and are not covered in the segment "
                msg += "summary table."
                msg += "\n%s" % "\n".join(map(str, missing2))
                logging.error(msg)
                missingFlag = True
        if checkSegmentSummary == 'raise_error' and missingFlag:
            errMsg = "Segment_summary discrepancy detected, exiting."
            raise ValueError(errMsg)
    elif checkSegmentSummary == 'no_test':
        pass
    else:
        errMsg = "checkSegmentSummary kwarg must take a value from 'no_test', "
        errMsg += "'warn', or 'raise_error'."
        raise ValueError(errMsg)

    # Now need to create the file for SCIENCE_AVAILABLE
    sci_avlble_dict = segments.segmentlistdict()
    # NOTE: Should this be overrideable in the config file?
    sci_avlble_name = "SCIENCE_AVAILABLE"
    for ifo in scienceSegs.keys():
        sci_avlble_dict[ifo + ':' + sci_avlble_name] = scienceSegs[ifo]

    sci_avlble_file = SegFile.from_segment_list_dict(
        'SCIENCE_AVAILABLE',
        sci_avlble_dict,
        ifo_list=scienceSegs.keys(),
        valid_segment=workflow.analysis_time,
        extension='.xml',
        tags=tags,
        directory=outputDir)

    logging.info("Leaving datafind module")
    if datafind_method == "AT_RUNTIME_FAKE_DATA":
        datafindouts = None
    else:
        datafindouts = FileList(datafindouts)

    return datafindouts, sci_avlble_file, scienceSegs, sci_avlble_name
Exemplo n.º 41
0
    def find_frame_urls(self,
                        site,
                        frametype,
                        gpsstart,
                        gpsend,
                        match=None,
                        urltype=None,
                        on_gaps="warn"):
        """Find the framefiles for the given type in the [start, end) interval
        frame

        @param site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param gpsstart:
            integer GPS start time of query
        @param gpsend:
            integer GPS end time of query
        @param match:
            regular expression to match against
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_gaps:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,
                - C{'error'}: raise an L{RuntimeError}, or
                - C{'ignore'}: do nothing

        @type       site: L{str}
        @type  frametype: L{str}
        @type   gpsstart: L{int}
        @type     gpsend: L{int}
        @type      match: L{str}
        @type    urltype: L{str}
        @type    on_gaps: L{str}

        @returns: L{Cache<pycbc_glue.lal.Cache>}

        @raises RuntimeError: if gaps are found and C{on_gaps='error'}
        """
        if on_gaps not in ("warn", "error", "ignore"):
            raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.")
        url = ("%s/gwf/%s/%s/%s,%s" %
               (_url_prefix, site, frametype, gpsstart, gpsend))
        # if a URL type is specified append it to the path
        if urltype:
            url += "/%s" % urltype
        # request JSON output
        url += ".json"
        # append a regex if input
        if match:
            url += "?match=%s" % match
        # make query
        response = self._requestresponse("GET", url)
        urllist = decode(response.read())

        out = lal.Cache([
            lal.CacheEntry.from_T050017(x, coltype=self.LIGOTimeGPSType)
            for x in urllist
        ])

        if on_gaps == "ignore":
            return out
        else:
            span = segments.segment(gpsstart, gpsend)
            seglist = segments.segmentlist(e.segment for e in out).coalesce()
            missing = (segments.segmentlist([span]) - seglist).coalesce()
            if span in seglist:
                return out
            else:
                msg = "Missing segments: \n%s" % "\n".join(map(str, missing))
                if on_gaps == "warn":
                    sys.stderr.write("%s\n" % msg)
                    return out
                else:
                    raise RuntimeError(msg)
Exemplo n.º 42
0
    def find_frame_urls(self, site, frametype, gpsstart, gpsend,
                        match=None, urltype=None, on_gaps="warn"):
        """Find the framefiles for the given type in the [start, end) interval
        frame

        @param site:
            single-character name of site to match
        @param frametype:
            name of frametype to match
        @param gpsstart:
            integer GPS start time of query
        @param gpsend:
            integer GPS end time of query
        @param match:
            regular expression to match against
        @param urltype:
            file scheme to search for (e.g. 'file')
        @param on_gaps:
            what to do when the requested frame isn't found, one of:
                - C{'warn'} (default): print a warning,
                - C{'error'}: raise an L{RuntimeError}, or
                - C{'ignore'}: do nothing

        @type       site: L{str}
        @type  frametype: L{str}
        @type   gpsstart: L{int}
        @type     gpsend: L{int}
        @type      match: L{str}
        @type    urltype: L{str}
        @type    on_gaps: L{str}

        @returns: L{Cache<pycbc_glue.lal.Cache>}

        @raises RuntimeError: if gaps are found and C{on_gaps='error'}
        """
        if on_gaps not in ("warn", "error", "ignore"):
            raise ValueError("on_gaps must be 'warn', 'error', or 'ignore'.")
        url = ("%s/gwf/%s/%s/%s,%s"
               % (_url_prefix, site, frametype, gpsstart, gpsend))
        # if a URL type is specified append it to the path
        if urltype:
            url += "/%s" % urltype
        # request JSON output
        url += ".json"
        # append a regex if input
        if match:
            url += "?match=%s" % match
        # make query
        response = self._requestresponse("GET", url)
        urllist  = decode(response.read())

        out = lal.Cache([lal.CacheEntry.from_T050017(x,
                         coltype=self.LIGOTimeGPSType) for x in urllist])

        if on_gaps == "ignore":
            return out
        else:
            span    = segments.segment(gpsstart, gpsend)
            seglist = segments.segmentlist(e.segment for e in out).coalesce()
            missing = (segments.segmentlist([span]) - seglist).coalesce()
            if span in seglist:
                return out
            else:
                msg = "Missing segments: \n%s" % "\n".join(map(str, missing))
                if on_gaps=="warn":
                    sys.stderr.write("%s\n" % msg)
                    return out
                else:
                    raise RuntimeError(msg)
Exemplo n.º 43
0
def vote(seglists, n):
    """
	Given a sequence of segmentlists, returns the intervals during
	which at least n of them intersect.  The input segmentlists must be
	coalesced, the output is coalesced.

	Example:

	>>> from pycbc_glue.segments import *
	>>> w = segmentlist([segment(0, 15)])
	>>> x = segmentlist([segment(5, 20)])
	>>> y = segmentlist([segment(10, 25)])
	>>> z = segmentlist([segment(15, 30)])
	>>> vote((w, x, y, z), 3)
	[segment(10, 20)]

	The sequence of segmentlists is only iterated over once, and the
	segmentlists within it are only iterated over once;  they can all
	be generators.  If there are a total of N segments in M segment
	lists and the final result has L segments the algorithm is O(N M) +
	O(L).
	"""
    # check for no-op

    if n < 1:
        return segments.segmentlist()

    # digest the segmentlists into an ordered sequence of off-on and
    # on-off transitions with the vote count for each transition
    # FIXME:  this generator is declared locally for now, is it useful
    # as a stand-alone generator?

    def pop_min(l):
        # remove and return the smallest value from a list
        val = min(l)
        for i in xrange(len(l) - 1, -1, -1):
            if l[i] is val:
                return l.pop(i)
        assert False  # cannot get here

    def vote_generator(seglists):
        queue = []
        for seglist in seglists:
            segiter = iter(seglist)
            try:
                seg = segiter.next()
            except StopIteration:
                continue
            # put them in so that the smallest boundary is
            # closest to the end of the list
            queue.append((seg[1], -1, segiter))
            queue.append((seg[0], +1, None))
        if not queue:
            return
        queue.sort(reverse=True)
        bound = queue[-1][0]
        votes = 0
        while queue:
            this_bound, delta, segiter = pop_min(queue)
            if this_bound == bound:
                votes += delta
            else:
                yield bound, votes
                bound = this_bound
                votes = delta
            if segiter is not None:
                try:
                    seg = segiter.next()
                except StopIteration:
                    continue
                queue.append((seg[1], -1, segiter))
                queue.append((seg[0], +1, None))
        yield bound, votes

    # compute the cumulative sum of votes, and assemble a segmentlist
    # from the intervals when the vote count is equal to or greater
    # than n

    result = segments.segmentlist()
    votes = 0
    for bound, delta in vote_generator(seglists):
        if delta > 0 and n - delta <= votes < n:
            start = bound
        elif delta < 0 and n <= votes < n - delta:
            result.append(segments.segment(start, bound))
            del start  # detect stops that aren't preceded by starts
        votes += delta
    assert votes == 0  # detect failed cumulative sum

    return result