def combine_ts(filenames): """ ....combine multiple files into a single time-series. Assumes filenames have the standard LIGO naming covention: *-start-dur.suffix ....Also assumes that filenames are sorted into chronological order ....returns lists of arrays, with each array consisting of only contiguous data ........return timeseries, times ....""" t = np.array([]) # the array storing continuous data ts = np.array([]) times = [] # a list that will contain stretches of continuous data timeseries = [] matchfile = re.compile('.*-([0-9]*)-([0-9]*).*$') end = False for filename in filenames: m = matchfile.match(filename) (_start, _dur) = (int(m.group(1)), int(m.group(2))) # ## check to see if we have continuous data if not end or end == _start: # beginning of data end = _start + _dur _file = event.gzopen(filename) _ts = np.load(_file) _file.close() ts = np.concatenate((ts, _ts)) t = np.concatenate((t, np.arange(_start, _start + _dur, 1.0 * _dur / len(_ts)))) else: # gap in the data! times.append(t) # put old continuous data into lists timeseries.append(ts) _file = event.gzopen(filename) # start new continuous data ts = np.load(_file) _file.close() t = np.arange(_start, _start + _dur, 1.0 * _dur / len(ts)) end = _start + _dur times.append(t) timeseries.append(ts) return (times, timeseries)
r, c, g = urocs[classifier] logger.info(' compute number of samples at each rank') dc, dg = idq.rcg_to_diff(c, g) ### get the numbers at each rank logger.info(' computing KDE for cleans') kde_cln = idq.kde_pwg(kde, r, dc) ### compute kde estimate logger.info(' computing KDE for glitches') kde_gch = idq.kde_pwg(kde, r, dg) ### write kde points to file kde_cln_name = idq.kdename(output_dir, classifier, ifo, "_cln%s" % usertag, gpsstart - lookback, lookback + stride) logger.info(' writing %s' % kde_cln_name) np.save(event.gzopen(kde_cln_name, "w"), (kde, kde_cln)) kde_gch_name = idq.kdename(output_dir, classifier, ifo, "_gch%s" % usertag, gpsstart - lookback, lookback + stride) logger.info(' writing %s' % kde_gch_name) np.save(event.gzopen(kde_gch_name, "w"), (kde, kde_gch)) ### update cache files if opts.force or ((c[-1] > min_num_cln) and (g[-1] >= min_num_gch)): logger.info(' adding %s to %s' % (kde_cln_name, kde_cache[classifier].name)) kde_cache[classifier].append(kde_cln_name) logger.info(' adding %s to %s' % (kde_gch_name, kde_cache[classifier].name)) kde_cache[classifier].append(kde_gch_name)
for classifier in classifiers: logger.info('computing KDE pdfs for %s'%classifier) r, c, g = urocs[classifier] logger.info(' compute number of samples at each rank') dc, dg = idq.rcg_to_diff( c, g ) ### get the numbers at each rank logger.info(' computing KDE for cleans') kde_cln = idq.kde_pwg( kde, r, dc ) ### compute kde estimate logger.info(' computing KDE for glitches') kde_gch = idq.kde_pwg( kde, r, dg ) ### write kde points to file kde_cln_name = idq.kdename(output_dir, classifier, ifo, "_cln%s"%usertag, gpsstart-lookback, lookback+stride) logger.info(' writing %s'%kde_cln_name) np.save(event.gzopen(kde_cln_name, "w"), (kde, kde_cln)) kde_gch_name = idq.kdename(output_dir, classifier, ifo, "_gch%s"%usertag, gpsstart-lookback, lookback+stride) logger.info(' writing %s'%kde_gch_name) np.save(event.gzopen(kde_gch_name, "w"), (kde, kde_gch)) ### update cache files if opts.force or ((c[-1] > min_num_cln) and (g[-1] >= min_num_gch)): logger.info(' adding %s to %s'%(kde_cln_name, kde_cache[classifier].name)) kde_cache[classifier].append( kde_cln_name ) logger.info(' adding %s to %s'%(kde_gch_name, kde_cache[classifier].name)) kde_cache[classifier].append( kde_gch_name ) else: logger.warning('WARNING: not enough samples to trust calibration. skipping kde update for %s'%classifier) #===============================================================================================
raise ValueError( 'there must be an even number of lines in kde_cache for %s' % classifier) kde_ranges = {} for ind in xrange(len(lines) / 2): kde_cln_name = lines[2 * ind] kde_gch_name = lines[2 * ind + 1] kde_range = idq.extract_kde_range(kde_cln_name) kde_ranges[kde_range] = (kde_cln_name, kde_gch_name) kde_range = idq.best_range(gps, kde_ranges.keys()) kde_cln_name, kde_gch_name = kde_ranges[kde_range] kde_cln_file = event.gzopen(kde_cln_name, 'r') kde, kde_cln = numpy.load(kde_cln_file) kde_cln_file.close() kde_gch_file = event.gzopen(kde_gch_name, 'r') _, kde_gch = numpy.load(kde_gch_file) kde_gch_file.close() ### store kdes kdeD[classifier]['kde'] = kde kdeD[classifier]['kde_cln'] = kde_cln kdeD[classifier]['ckde_cln'] = idq.kde_to_ckde(kde_cln) kdeD[classifier]['kde_gch'] = kde_gch kdeD[classifier]['ckde_gch'] = idq.kde_to_ckde(kde_gch)
# write merged timeseries file # merged_rank_filename = '%s/%s_idq_%s_rank_%s%d-%d.npy.gz' % ( # opts.output_dir, # opts.ifo, # opts.classifier, # opts.tag, # int(_start), # int(_dur)) merged_rank_filename = idq.gdb_timeseries(opts.output_dir, opts.classifier, opts.ifo, "_rank%s" % opts.tag, int(_start), int(_dur)) if opts.verbose: print "\twriting " + merged_rank_filename np.save(event.gzopen(merged_rank_filename, 'w'), ts) merged_rank_filenames.append(merged_rank_filename) rankfr = idq.gdb_timeseriesgwf(opts.output_dir, opts.classifier, opts.ifo, "_rank%s" % opts.tag, int(_start), int(_dur)) if opts.verbose: print "\twriting " + rankfr idq.timeseries2frame(rankfr, {rank_channame: ts}, _start, _dur / (len(t) - 1)) merged_rank_frames.append(rankfr) # generate and write summary statistics (r_min, r_max, r_mean, r_stdv) = idq.stats_ts(ts) if r_max > max_rank: max_rank = r_max
end = _end # write merged timeseries file # merged_rank_filename = '%s/%s_idq_%s_rank_%s%d-%d.npy.gz' % ( # opts.output_dir, # opts.ifo, # opts.classifier, # opts.tag, # int(_start), # int(_dur)) merged_rank_filename = idq.gdb_timeseries(opts.output_dir, opts.classifier, opts.ifo, "_rank%s"%opts.tag, int(_start), int(_dur)) if opts.verbose: print "\twriting " + merged_rank_filename np.save(event.gzopen(merged_rank_filename, 'w'), ts) merged_rank_filenames.append(merged_rank_filename) rankfr = idq.gdb_timeseriesgwf(opts.output_dir, opts.classifier, opts.ifo, "_rank%s"%opts.tag, int(_start), int(_dur)) if opts.verbose: print "\twriting " + rankfr idq.timeseries2frame( rankfr, {rank_channame:ts}, _start, _dur/(len(t)-1) ) merged_rank_frames.append( rankfr ) # generate and write summary statistics (r_min, r_max, r_mean, r_stdv) = idq.stats_ts(ts) if r_max > max_rank: max_rank = r_max max_rank_segNo = segNo rank_summaries.append([ _start,
lines = cache.readlines() if len(lines)%2: raise ValueError('there must be an even number of lines in kde_cache for %s'%classifier) kde_ranges = {} for ind in xrange(len(lines)/2): kde_cln_name = lines[2*ind] kde_gch_name = lines[2*ind+1] kde_range = idq.extract_kde_range( kde_cln_name ) kde_ranges[kde_range] = (kde_cln_name, kde_gch_name) kde_range = idq.best_range( gps, kde_ranges.keys() ) kde_cln_name, kde_gch_name = kde_ranges[kde_range] kde_cln_file = event.gzopen(kde_cln_name, 'r') kde, kde_cln = numpy.load(kde_cln_file) kde_cln_file.close() kde_gch_file = event.gzopen(kde_gch_name, 'r') _ , kde_gch = numpy.load(kde_gch_file) kde_gch_file.close() ### store kdes kdeD[classifier]['kde'] = kde kdeD[classifier]['kde_cln'] = kde_cln kdeD[classifier]['ckde_cln'] = idq.kde_to_ckde( kde_cln ) kdeD[classifier]['kde_gch'] = kde_gch kdeD[classifier]['ckde_gch'] = idq.kde_to_ckde( kde_gch )