def create_dbCompF( self, connection, diffFunc, compFuncName, window, classAcolumns = None, classBcolumns = None ): """ Creates a function in the given connection to a database that allows the given diffFunc to be performed on classA and classB on the fly. To ensure data is populated correctly, self.neededColumnsA and self.neededColumnsB are set to whatever the order of columns is in the database. This can be overwritten using classAcolumns and classBcolumns @connection: a connection to SQLite database @diffFunc: the function to use to do comparisons; must be one of the functions defined in this class @compFuncName: what to call the call function in the database; this must be unique @window: the size of the window to use when determining whether or not classA and classB are the same @classAcolumns: set what columns will be used in the function, and the order they will be passed. Use this if not all of the columns in classA's table will be used. @classBcolumns: set what columns will be used in the function, and the order they will be passed. Use this if not all of the columns in classB's table will used. """ self.set_diffFunc( diffFunc ) self.set_window( window ) classAcolumns = sqlutils.get_column_names_from_table( connection, self.classA.tableName) classBcolumns = sqlutils.get_column_names_from_table( connection, self.classB.tableName) if self.diffFunc == self.eThincaSim or self.diffFunc == self.eThincaSngl: self.set_neededColumnsA(classAcolumns) self.set_neededColumnsB(classBcolumns) else: # figure out what columns are needed if self.matchCriteriaA == 'startTime': neededColumnsA = [col for col in classAcolumns if re.search('ifo|start_time', col) is not None] elif self.matchCriteriaA == 'endTime': neededColumnsA = [col for col in classAcolumns if re.search('ifo|end_time', col) is not None] else: neededColumnsA = [col for col in classAcolumns if re.search(self.matchCriteriaA, col) is not None] self.set_neededColumnsA(neededColumnsA) if self.matchCriteriaB == 'startTime': neededColumnsB = [col for col in classBcolumns if re.search('ifo|start_time', col) is not None] elif self.matchCriteriaB == 'endTime': neededColumnsB = [col for col in classBcolumns if re.search('ifo|end_time', col) is not None] else: neededColumnsB = [col for col in classBcolumns if re.search(self.matchCriteriaB, col) is not None] self.set_neededColumnsB(neededColumnsB) connection.create_function(compFuncName, len(self.neededColumnsA)+len(self.neededColumnsB), self.dbWrapper)
def __init__(self, connection, tableName, baseClass=None): self.connection = connection self.columns = sqlutils.get_column_names_from_table( self.connection, tableName) self.rowClass = createDataRowClass(self.tableName, baseClass, self.columns) self._function = None
def __init__(self, connection, tableName, baseClass = None ): self.connection = connection self.columns = sqlutils.get_column_names_from_table( self.connection, tableName ) self.rowClass = createDataRowClass( self.tableName, baseClass, self.columns ) self._function = None
def get_sngl_info(connection, summary_table, sngl_table, daily_ihope_pages_location = 'https://ldas-jobs.ligo.caltech.edu/~cbc/ihope_daily', verbose = False): from pylal import ligolw_sqlutils as sqlutils if verbose: print >> sys.stderr, "Getting Sngl table info..." # index the summary table lcindex = dict([ [row.coinc_event_id, row] for row in summary_table ]) # get sngl table columns sngl_table_cols = sqlutils.get_column_names_from_table( connection, sngl_table ) # create an object to store the info import copy class MergedTable(table.Table): tableName = summary_table.tableName validcolumns = copy.deepcopy(summary_table.validcolumns) for sngl_col in sngl_table_cols: validcolumns['sngl_'+sngl_col] = sqlutils.get_col_type(sngl_table, sngl_col) validcolumns['sngl_event_time_utc__Px_click_for_daily_ihope_xP_'] = 'lstring' validcolumns['sngl_ifo__Px_click_for_elog_xP_'] = 'lstring' class Merged(object): __slots__ = MergedTable.validcolumns.keys() _effective_snr = None _new_snr = None def get_sngl_gps_time(self): if 'sngl_start_time' in self.__slots__: return self.sngl_start_time elif 'sngl_end_time' in self.__slots__: return self.sngl_end_time else: raise AttributeError, "could not find a sngl_start_time or sngl_end_time" @property def new_snr(self): if not self._new_snr: self._new_snr = lsctables.SnglInspiral.get_new_snr(self) return self._new_snr @new_snr.setter def new_snr(self): err_msg = "The new_snr property cannot be set directly." raise ValueError(err_msg) @property def effective_snr(self): if not self._effective_snr: self._effective_snr = \ lsctables.SnglInspiral.get_effective_snr(self) return self._new_snr @new_snr.setter def effective_snr(self): err_msg = "The effective_snr property cannot be set directly." raise ValueError(err_msg) def get_pyvalue(self): return printutils.generic_get_pyvalue() # connect the row to the table MergedTable.RowType = Merged # # create a document # mtable = lsctables.New(MergedTable) sqlquery = "CREATE TEMP TABLE select_ceids (coinc_event_id)" connection.cursor().execute(sqlquery) sqlquery = "INSERT INTO select_ceids (coinc_event_id) VALUES (?)" connection.cursor().executemany(sqlquery, [(row.coinc_event_id,) for row in summary_table]) sqlquery = ''.join([""" SELECT select_ceids.coinc_event_id, """, sngl_table, """.* FROM """, sngl_table, ''' JOIN select_ceids, coinc_event_map ON select_ceids.coinc_event_id == coinc_event_map.coinc_event_id AND coinc_event_map.table_name == "''', sngl_table, """" AND coinc_event_map.event_id == """, sngl_table, """.event_id """]) if verbose: print >> sys.stderr, "SQLite Query Used is:" print >> sys.stderr, sqlquery for data in connection.cursor().execute(sqlquery).fetchall(): mrow = Merged() data = list(data) mrow.coinc_event_id = data.pop(0) # set sngl info for sngl_col, value in zip(sngl_table_cols, data): setattr(mrow, 'sngl_'+sngl_col, value) # set coinc info [setattr(mrow, col, getattr(lcindex[mrow.coinc_event_id], col)) for col in lcindex[mrow.coinc_event_id].__slots__ if col != 'coinc_event_id'] # set sngl end time utc gps_time_utc = format_end_time_in_utc( mrow.get_sngl_gps_time() ) daily_ihope_address = get_daily_ihope_page(mrow.get_sngl_gps_time(), pages_location = daily_ihope_pages_location) mrow.sngl_event_time_utc__Px_click_for_daily_ihope_xP_ = create_hyperlink( daily_ihope_address, gps_time_utc ) # set elog page mrow.sngl_ifo__Px_click_for_elog_xP_ = create_hyperlink( get_elog_page(mrow.sngl_ifo, mrow.get_sngl_gps_time()), mrow.sngl_ifo ) # add the row mtable.append(mrow) # delete the select_ceids table connection.cursor().execute('DROP TABLE select_ceids') return mtable
def printmissed(connection, simulation_table, recovery_table, map_label, livetime_program, param_name = None, param_ranges = None, exclude_coincs = None, include_only_coincs = None, sim_tag = 'ALLINJ', limit = None, daily_ihope_pages_location = 'https://ldas-jobs.ligo.caltech.edu/~cbc/ihope_daily', verbose = False): from pylal import ligolw_sqlutils as sqlutils from pylal import ligolw_cbc_compute_durations as compute_dur from glue import segments from glue.ligolw import dbtables # Get simulation/recovery tables simulation_table = sqlutils.validate_option(simulation_table) recovery_table = sqlutils.validate_option(recovery_table) # create the get_sim_tag function sim_map = sqlutils.sim_tag_proc_id_mapper( connection ) connection.create_function( 'get_sim_tag', 1, sim_map.get_sim_tag ) # # Create and prepare the CloseMissedTable to store summary information # # Get simulation table column names from database simulation_table_columns = sqlutils.get_column_names_from_table( connection, simulation_table ) column_names = simulation_table_columns + \ ['rank', 'decisive_distance', 'gps_time', 'gps_time_ns', 'injection_time_utc__Px_click_for_daily_ihope_xP_', 'elogs', 'instruments_on', 'veto_def_name', 'mini_followup','omega_scan', 'sim_tag'] # define needed tables class CloseMissedTable(table.Table): tableName = "close_missed_injections:table" validcolumns = {} for col_name in column_names: if 'rank' in col_name: validcolumns[col_name] = "int_4u" elif 'instruments_on' == col_name: validcolumns[col_name] = lsctables.ExperimentTable.validcolumns['instruments'] elif 'veto_def_name' == col_name: validcolumns[col_name] = lsctables.ExperimentSummaryTable.validcolumns['veto_def_name'] elif 'decisive_distance' == col_name: validcolumns[col_name] = sqlutils.get_col_type(simulation_table, 'eff_dist_h') elif 'gps_time' == col_name or 'gps_time_ns' == col_name: validcolumns[col_name] = "int_4s" elif 'sim_tag' == col_name: validcolumns[col_name] = "lstring" else: validcolumns[col_name] = sqlutils.get_col_type(simulation_table, col_name, default = 'lstring') class CloseMissed(object): __slots__ = CloseMissedTable.validcolumns.keys() def get_pyvalue(self): return generic_get_pyvalue(self) # connect the rows to the tables CloseMissedTable.RowType = CloseMissed # create the table cmtable = lsctables.New(CloseMissedTable) # set up sim_rec_map table sqlutils.create_sim_rec_map_table(connection, simulation_table, recovery_table, map_label, None) # # Set table filters # # we force the include/exclude filters to None; will check for excluded/included ifo time # when cycling through the ifo times filter = """ WHERE simulation_id NOT IN ( SELECT sim_id FROM sim_rec_map )""" af = create_filter( connection, simulation_table, param_name = param_name, param_ranges = param_ranges, exclude_coincs = None, include_only_coincs = None, sim_tag = sim_tag, verbose = verbose) af = re.sub(r'experiment_summary[.]sim_proc_id', 'process_id', af) if af != '': filter = '\n'.join([ filter, """ AND""", af]) # get desired instrument times if include_only_coincs is not None: include_times = [on_instruments for on_instruments, type in sqlutils.parse_coinc_options( include_only_coincs, verbose = verbose ).get_coinc_types().items() if 'ALL' in type] if exclude_coincs is not None: exclude_times = [on_instruments for on_instruments, type in sqlutils.parse_coinc_options( exclude_coincs, verbose = verbose ).get_coinc_types().items() if 'ALL' in type] # get the usertags of inspiral jobs in this database sqlquery = """ SELECT value FROM process_params WHERE param == "-userTag" GROUP BY value """ usertags = set(usertag[0] for usertag in connection.cursor().execute(sqlquery) ) # Get the single-ifo science segments after CAT-1 vetoes try: if "FULL_DATA" in usertags: tag = "FULL_DATA" else: tag = list(usertags)[0] except IndexError: # This is hacky anyway, so let's just take a guess tag = "FULL_DATA" ifo_segments = compute_dur.get_single_ifo_segments(connection, program_name = livetime_program, usertag = tag) if ifo_segments == {}: raise ValueError, "Cannot find any analysis segments using %s as a livetime program; cannot get missed injections." % livetime_program if verbose: print >> sys.stderr, "Getting all veto category names from the experiment_summary table..." xmldoc = dbtables.get_xml(connection) # get veto_segments veto_segments = compute_dur.get_veto_segments(xmldoc, verbose) # make a dictionary of zerolag "shifts" needed for the get_coinc_segments function zerolag_dict = {} for ifo in ifo_segments: zerolag_dict[ifo] = 0.0 # Cycle over available veto categories for veto_def_name, veto_seg_dict in veto_segments.items(): post_vetoes_ifosegs = ifo_segments - veto_seg_dict # make a dictionary of coincident segments by exclusive on-ifos coinc_segs = compute_dur.get_coinc_segments(post_vetoes_ifosegs, zerolag_dict) # # Get all the on_instrument times and cycle over them # sqlquery = """ SELECT DISTINCT experiment.instruments FROM experiment JOIN experiment_summary ON ( experiment.experiment_id == experiment_summary.experiment_id ) WHERE experiment_summary.veto_def_name == :1 """ for on_instruments in connection.cursor().execute(sqlquery, (veto_def_name,)).fetchall(): on_instruments = lsctables.instrument_set_from_ifos(on_instruments[0]) # check if this on_instruments is desired; if not, skip if include_only_coincs is not None and frozenset(on_instruments) not in include_times: continue if exclude_coincs is not None and frozenset(on_instruments) in exclude_times: continue on_times = coinc_segs[','.join(sorted(on_instruments))] def is_in_on_time(gps_time, gps_time_ns): return gps_time + (1e-9 * gps_time_ns) in on_times connection.create_function('is_in_on_time', 2, is_in_on_time) # add the check for on time to the filter in_this_filter = filter # figure out if simulation_table has end_times or start_times end_or_start = any('end_time' in c for c in simulation_table_columns) and '_end_time' or '_start_time' for instrument in on_instruments: inst_time = instrument.lower()[0] + end_or_start inst_time_ns = inst_time + '_ns' in_this_filter = ''.join([ in_this_filter, '\n\tAND is_in_on_time(', inst_time, ',', inst_time_ns, ')' ]) # # Set up decisive distance argument # def get_decisive_distance( *args ): return sorted(args)[1] connection.create_function('get_decisive_distance', len(on_instruments), get_decisive_distance) decisive_distance = ''.join(['get_decisive_distance(', ','.join(['eff_dist_'+inst.lower()[0] for inst in on_instruments]), ')' ]) # # Initialize ranking. Statistics for ranking are based on decisive distance # if verbose: print >> sys.stderr, "Getting statistics for ranking..." ranker = sqlutils.rank_stats(simulation_table, decisive_distance, 'ASC') # add requirement that stats not be found in the sim_rec_table to in_this_filter ranker.populate_stats_list(connection, limit = limit, filter = in_this_filter) connection.create_function( 'rank', 1, ranker.get_rank ) # # Get the Data # sqlquery = ''.join([""" SELECT *, get_sim_tag(process_id), """, decisive_distance, """, rank(""", decisive_distance, """) FROM """, simulation_table, """ """, in_this_filter, """ %s""" % (limit is not None and ''.join(['AND rank(', decisive_distance, ') <= ', str(limit)]) or ''), """ ORDER BY rank(""", decisive_distance, """) ASC """]) if verbose: print >> sys.stderr, "Getting injections..." print >> sys.stderr, "SQLite query used is:" print >> sys.stderr, sqlquery for values in connection.cursor().execute( sqlquery ).fetchall(): cmrow = CloseMissed() [ setattr(cmrow, column, values[ii]) for ii, column in enumerate(simulation_table_columns) ] cmrow.decisive_distance = values[-2] cmrow.rank = values[-1] cmrow.instruments_on = lsctables.ifos_from_instrument_set(on_instruments) cmrow.veto_def_name = veto_def_name cmrow.sim_tag = values[-3] cmrow.mini_followup = None cmrow.omega_scan = None cmrow.gps_time = getattr(cmrow, sorted(on_instruments)[0][0].lower() + end_or_start) cmrow.gps_time_ns = getattr(cmrow, sorted(on_instruments)[0][0].lower() + end_or_start + '_ns') # set elog page elog_pages = [(ifo, get_elog_page(ifo, cmrow.gps_time)) for ifo in on_instruments] cmrow.elogs = ','.join([ create_hyperlink(elog[1], elog[0]) for elog in sorted(elog_pages) ]) # set daily_ihope page injection_time_utc = format_end_time_in_utc( cmrow.gps_time ) daily_ihope_address = get_daily_ihope_page(cmrow.gps_time, pages_location = daily_ihope_pages_location) cmrow.injection_time_utc__Px_click_for_daily_ihope_xP_ = create_hyperlink( daily_ihope_address, injection_time_utc ) # add the row cmtable.append(cmrow) # drop the sim_rec_map table connection.cursor().execute("DROP TABLE sim_rec_map") return cmtable
def printsims(connection, simulation_table, recovery_table, map_label, ranking_stat, rank_by, comparison_datatype, sort_by = 'rank', param_name = None, param_ranges = None, exclude_coincs = None, include_only_coincs = None, sim_tag = 'ALLINJ', rank_range = None, convert_durations = 's', daily_ihope_pages_location = 'https://ldas-jobs.ligo.caltech.edu/~cbc/ihope_daily', verbose = False): from pylal import ligolw_sqlutils as sqlutils # check and format options appropriately simulation_table = sqlutils.validate_option(simulation_table) recovery_table = sqlutils.validate_option(recovery_table) ranking_stat = sqlutils.validate_option(ranking_stat) rank_by = sqlutils.validate_option(rank_by, lower = False).upper() comparison_datatype = sqlutils.validate_option(comparison_datatype) convert_durations = sqlutils.validate_option(convert_durations) if rank_by == 'MIN': rank_by = 'ASC' elif rank_by == 'MAX': rank_by = 'DESC' elif rank_by != 'ASC' or rank_by != 'DESC': raise ValueError, 'rank_by must be MAX (or DESC) or MIN (or ASC)' if not ranking_stat.startswith(recovery_table): ranking_stat = '.'.join([recovery_table, ranking_stat]) # # Set up sim_rec_map table # if 'sim_rec_map' not in sqlutils.get_tables_in_database( connection ): sqlutils.create_sim_rec_map_table(connection, simulation_table, recovery_table, map_label, ranking_stat) # # Initialize ranking. Statistics for ranking are collected from non-injections # in the recovery table. # if verbose: print >> sys.stderr, "Getting statistics for ranking..." ranker = sqlutils.rank_stats(recovery_table, ranking_stat, rank_by) # add requirement that stats not be found in the sim_rec_table to in_this_filter rank_filter = ''.join([ recovery_table, '''.coinc_event_id NOT IN ( SELECT rec_id FROM sim_rec_map) AND experiment_summary.datatype == "''', comparison_datatype, '"']) # add to the rank filter any other desired filters; note that sim-tag is forced to ALLINJ if not comparing # to simulation datatype in_this_filter = create_filter(connection, recovery_table, param_name = param_name, param_ranges = param_ranges, exclude_coincs = exclude_coincs, include_only_coincs = include_only_coincs, sim_tag = comparison_datatype == 'simulation' and sim_tag or 'ALLINJ', verbose = False) if in_this_filter != '': rank_filter = '\n\tAND '.join([ in_this_filter, rank_filter ]) rank_filter = '\n\t'.join([ sqlutils.join_experiment_tables_to_coinc_table(recovery_table), 'WHERE', rank_filter ]) ranker.populate_stats_list(connection, limit = None, filter = rank_filter) connection.create_function( 'rank', 1, ranker.get_rank ) # # Set recovery table filters # in_this_filter = create_filter(connection, recovery_table, param_name = param_name, param_ranges = param_ranges, exclude_coincs = exclude_coincs, include_only_coincs = include_only_coincs, sim_tag = sim_tag, verbose = verbose) # Now apply the filter to the sim_rec_map table: this will delete all sim/rec maps where the simulation id is # mapped to a recovered event that falls outside the filter, even if that particular sim/rec map is in the # filter. For example, if the filter is recovery_table.combined_far != 0., and there are two entries in the # sim_rec_map table sharing the same sim_id: # sim_id:0 | rec_id:0 | rec_id's combined_far = 0. # sim_id:0 | rec_id:1 | rec_id's combined_far = 1.2 # both entries will get deleted even though the second entry's combined_far is not 0. if in_this_filter != '': # join the needed tables to in_this_filter in_this_filter = ''.join([ sqlutils.join_experiment_tables_to_coinc_table(recovery_table), "\n WHERE\n\t", in_this_filter ]) sqlscript = ''.join([ """ CREATE TEMP TABLE del_ids AS SELECT sim_id AS del_id FROM sim_rec_map WHERE rec_id NOT IN ( SELECT """, recovery_table, """.coinc_event_id FROM """, recovery_table, """ """, in_this_filter, """ AND experiment_summary.datatype == "simulation" ); DELETE FROM sim_rec_map WHERE sim_id IN ( SELECT del_id FROM del_ids ); DROP TABLE del_ids;""" ]) connection.cursor().executescript(sqlscript) # # Set other needed functions # # establish what units will be converting duration to def convert_duration( duration ): return sqlutils.convert_duration( duration, convert_durations ) connection.create_function( 'convert_duration', 1, convert_duration ) # create the get_sim_tag function sim_map = sqlutils.sim_tag_proc_id_mapper( connection ) connection.create_function( 'get_sim_tag', 1, sim_map.get_sim_tag ) # Get range ranks if rank_range is not None: rank_range_parser = sqlutils.parse_param_ranges( 'rank(sim_rec_map', 'ranking_stat)', rank_range, verbose = verbose ) # # Create and prepare the SelectedFoundTable to store summary information # # Get recovery table and simulation table column names from database simulation_table_columns = sqlutils.get_column_names_from_table( connection, simulation_table ) recovery_table_columns = sqlutils.get_column_names_from_table( connection, recovery_table ) # Get list of column name for injected parameters injected_cols = [] for col in simulation_table_columns: if col == 'simulation_id': injected_cols.append('simulation_id') else: injected_cols.append('injected_'+col) injected_cols.extend(['injected_decisive_distance','injected_gps_time', 'injected_gps_time_ns', 'injected_event_time_utc__Px_click_for_daily_ihope_xP_']) # Get list of column names from the recovery table recovered_cols = [] for col in recovery_table_columns: if col == 'coinc_event_id': recovered_cols.append(u'coinc_event_id') else: recovered_cols.append(u'recovered_'+col) # generate list of column names for the summary table column_names = injected_cols + recovered_cols # add instruments on, duration, mini_followups rankname = 'rank_in_' + comparison_datatype.strip().lower() + '_using_' + ranking_stat.split('.')[-1] durname = ''.join([ 'simulation', u'_duration__Px_', convert_durations, '_xP_' ]) column_names.extend( [ rankname, 'recovered_match_rank', 'instruments_on', 'elogs', durname, 'mini_followup','omega_scan', 'sim_tag' ] ) # # define needed tables # class tmpSimTable(table.Table): tableName = "tmp_sim_table:table" validcolumns = dict([ [col, sqlutils.get_col_type(simulation_table, col)] for col in simulation_table_columns ]) class tmpSim(object): __slots__ = tmpSimTable.validcolumns.keys() def get_gps_time(self, site): if '%s_start_time' % site in self.__slots__: return LIGOTimeGPS(getattr(self, '%s_start_time' % site ), getattr(self, '%s_start_time_ns' % site)) elif '%s_end_time' % site in self.__slots__: return LIGOTimeGPS(getattr(self, '%s_end_time' % site ), getattr(self, '%s_end_time_ns' % site)) else: raise AttributeError, "could not find an injected %s_start_time nor an injected %s_end_time" %(site,site) def get_pyvalue(self): return generic_get_pyvalue(self) class tmpRecTable(table.Table): tableName = "tmp_rec_table:table" validcolumns = dict([ [col, sqlutils.get_col_type(recovery_table, col)] for col in recovery_table_columns ]) class tmpRec(object): __slots__ = tmpRecTable.validcolumns.keys() def get_gps_time(self): if 'start_time' in self.__slots__: return LIGOTimeGPS(self.start_time, self.start_time_ns) elif 'end_time' in self.__slots__: return LIGOTimeGPS(self.end_time, self.end_time_ns) else: raise AttributeError, "could not find a recovered start_time or recovered end_time" def get_pyvalue(self): return generic_get_pyvalue(self) class SelectedFoundTable(table.Table): tableName = "selected_found_injections:table" validcolumns = {} for col_name in column_names: if 'rank_in_' in col_name: validcolumns[col_name] = "int_4u" elif '_duration_' in col_name: validcolumns[col_name] = "real_8" elif 'instruments_on' == col_name: validcolumns[col_name] = lsctables.ExperimentTable.validcolumns['instruments'] elif col_name == 'injected_start_time' or col_name == 'injected_start_time_ns': validcolumns[col_name] = "int_4s" elif col_name == 'injected_gps_time' or col_name == 'injected_gps_time_ns': validcolumns[col_name] = "int_4s" elif col_name == 'injected_decisive_distance': validcolumns[col_name] = "real_8" elif 'injected_' in col_name or col_name == 'simulation_id': validcolumns[col_name] = sqlutils.get_col_type(simulation_table, re.sub('injected_', '', col_name)) elif 'recovered_' in col_name or col_name == 'coinc_event_id': validcolumns[col_name] = sqlutils.get_col_type(recovery_table, re.sub('recovered_', '', col_name)) # if custom columns exist in the database, just set them to lstrings else: validcolumns[col_name] = "lstring" # add FAP columns validcolumns['recovered_fap'] = "real_8" validcolumns['recovered_fap_1yr'] = "real_8" class SelectedFound(object): __slots__ = SelectedFoundTable.validcolumns.keys() def set_recovered_gps_time(self, gps_time): if 'recovered_start_time' in self.__slots__: self.recovered_start_time = gps_time.seconds self.recovered_start_time_ns = gps_time.nanoseconds elif 'recovered_end_time' in self.__slots__: self.recovered_end_time = gps_time.seconds self.recovered_end_time_ns = gps_time.nanoseconds else: raise AttributeError, "could not find a recovered_start_time or recovered_end_time" def get_recovered_gps_time(self): if 'recovered_start_time' in self.__slots__: return LIGOTimeGPS(self.recovered_start_time, self.recovered_start_time_ns) elif 'recovered_end_time' in self.__slots__: return LIGOTimeGPS(self.recovered_end_time, self.recovered_end_time_ns) else: raise AttributeError, "could not find a recovered_start_time or recovered_end_time" def set_injected_gps_time(self, site, gps_time): if 'injected_%s_start_time' % site in self.__slots__: setattr(self, 'injected_%s_start_time' % site, gps_time.seconds) setattr(self, 'injected_%s_start_time_ns' % site, gps_time.nanoseconds) elif 'injected_%s_end_time' % site in self.__slots__: setattr(self, 'injected_%s_end_time' % site, gps_time.seconds) setattr(self, 'injected_%s_end_time_ns' % site, gps_time.nanoseconds) else: raise AttributeError, "could not find an injected_%s_start_time nor an injected_%s_end_time" %(site,site) def get_injected_gps_time(self, site): if 'injected_%s_start_time' % site in self.__slots__: return LIGOTimeGPS(getattr(self, 'injected_%s_start_time' % site ), getattr(self, 'injected_%s_start_time_ns' % site)) elif 'injected_%s_end_time' % site in self.__slots__: return LIGOTimeGPS(getattr(self, 'injected_%s_end_time' % site ), getattr(self, 'injected_%s_end_time_ns' % site)) else: raise AttributeError, "could not find an injected_%s_start_time nor an injected_%s_end_time" %(site,site) def get_pyvalue(self): return generic_get_pyvalue(self) # connect the rows to the tables tmpSimTable.RowType = tmpSim tmpRecTable.RowType = tmpRec SelectedFoundTable.RowType = SelectedFound # # Get the Data # tmp_sim_table = lsctables.New(tmpSimTable) tmp_rec_table = lsctables.New(tmpRecTable) tmp_sftable = lsctables.New(SelectedFoundTable) prior_sim_id = '' group_rank = None current_match_rank = 1 sqlquery = ''.join([""" SELECT """, simulation_table, """.*, """, recovery_table, """.*, get_sim_tag(experiment_summary.sim_proc_id), rank(sim_rec_map.ranking_stat), NULL AS match_rank, experiment.instruments, convert_duration(experiment_summary.duration), NULL AS mini_followup FROM sim_rec_map JOIN """, ', '.join([simulation_table, recovery_table]), """, experiment, experiment_summary, experiment_map ON ( sim_rec_map.sim_id == """, simulation_table, """.simulation_id AND sim_rec_map.rec_id == """, recovery_table, """.coinc_event_id AND sim_rec_map.rec_id == experiment_map.coinc_event_id AND experiment_map.experiment_summ_id == experiment_summary.experiment_summ_id AND experiment_summary.experiment_id == experiment.experiment_id) ORDER BY sim_rec_map.sim_id, sim_rec_map.ranking_stat """, rank_by]) if verbose: print >> sys.stderr, "Getting coincs..." print >> sys.stderr, "SQLite query used is:" print >> sys.stderr, sqlquery for values in connection.cursor().execute( sqlquery ).fetchall(): # sort the data tmp_sim_row = tmpSim() tmp_rec_row = tmpRec() [ setattr(tmp_sim_row, column, values[ii]) for ii, column in enumerate(simulation_table_columns) ] [ setattr(tmp_rec_row, column, values[ii+1+jj]) for jj, column in enumerate(recovery_table_columns) ] # figure out the rank this_inj_rank = values[-5] this_sim_id = tmp_sim_row.simulation_id this_ranking_stat = getattr(tmp_rec_row, ranking_stat.split('.')[-1]) if this_sim_id == prior_sim_id and this_ranking_stat != prior_ranking_stat: current_match_rank += 1 elif this_sim_id != prior_sim_id: current_match_rank = 1 group_rank = this_inj_rank prior_sim_id = this_sim_id prior_ranking_stat = this_ranking_stat # only store data if the group_rank falls in the desired rank ranges if rank_range and rank_range_parser.group_by_param_range(group_rank) is None: continue on_instruments = lsctables.instrument_set_from_ifos(values[-3]) duration = values[-2] # now that have all the information from this row, create a row for the selected found table sfrow = SelectedFound() # set the ranks setattr(sfrow, rankname, group_rank) sfrow.recovered_match_rank = current_match_rank # set the injected parameters use_this_site = sorted(on_instruments)[0][0].lower() injected_time = tmp_sim_row.get_gps_time( use_this_site ) sfrow.injected_gps_time = injected_time.seconds sfrow.injected_gps_time_ns = injected_time.nanoseconds for col in simulation_table_columns: if col == "simulation_id": sfrow.simulation_id = tmp_sim_row.simulation_id else: setattr(sfrow, 'injected_'+col, getattr( tmp_sim_row, col) ) # set the recovered parameters for col in recovery_table_columns: if col == "coinc_event_id": sfrow.coinc_event_id = tmp_rec_row.coinc_event_id else: setattr(sfrow, 'recovered_'+col, getattr( tmp_rec_row, col) ) # calculate and add faps if sfrow.recovered_combined_far is not None: t_in_s = float(values[-2]) / sqlutils.convert_duration(1, convert_durations) sfrow.recovered_fap = 1 - math.exp(-sqlutils.convert_duration(t_in_s, 'yr') * sfrow.recovered_combined_far) sfrow.recovered_fap_1yr = 1 - math.exp(-sfrow.recovered_combined_far) else: sfrow.recovered_fap = None sfrow.recovered_fap_1yr = None # set elog page elog_pages = [(ifo, get_elog_page(ifo, sfrow.injected_gps_time)) for ifo in on_instruments] sfrow.elogs = ','.join([ create_hyperlink(elog[1], elog[0]) for elog in sorted(elog_pages) ]) # set daily_ihope page event_time_utc = format_end_time_in_utc( sfrow.injected_gps_time ) daily_ihope_address = get_daily_ihope_page(sfrow.injected_gps_time, pages_location = daily_ihope_pages_location) sfrow.injected_event_time_utc__Px_click_for_daily_ihope_xP_ = create_hyperlink( daily_ihope_address, event_time_utc ) # set any other info sfrow.instruments_on = ','.join(sorted(on_instruments)) sfrow.injected_decisive_distance = sorted([getattr(sfrow, 'injected_eff_dist_%s' % ifo[0].lower()) for ifo in on_instruments])[1] sfrow.mini_followup = None sfrow.omega_scan = None sfrow.sim_tag = values[-6] setattr(sfrow, durname, duration) # add the row tmp_sftable.append(sfrow) # Re-sort the sftable by rank, recovered_match_rank sftable = lsctables.New(SelectedFoundTable) for sfrow in sorted([ row for row in tmp_sftable ], key = lambda row: getattr(row, sort_by == 'rank' and rankname or sort_by) ): if sfrow.simulation_id not in [row.simulation_id for row in sftable]: sftable.append(sfrow) sftable.extend(sub_row for sub_row in sorted([row for row in tmp_sftable if row.simulation_id == sfrow.simulation_id and row.coinc_event_id != sfrow.coinc_event_id], key = lambda row: row.recovered_match_rank)) # drop the sim_rec_map table connection.cursor().execute("DROP TABLE sim_rec_map") return sftable
def dbinjfind(connection, simulation_table, recovery_table, match_criteria, rough_match=None, rejection_criteria=[], rough_rejection=None, verbose=False): # validate simulation_table and recovery_table simulation_table = sqlutils.validate_option(simulation_table) recovery_table = sqlutils.validate_option(recovery_table) # create DataRow classes to store data for each table simColumns = sqlutils.get_column_names_from_table(connection, simulation_table) recColumns = sqlutils.get_column_names_from_table(connection, recovery_table) SimDataRow = dataUtils.createDataRowClass(simulation_table, columns=simColumns) RecDataRow = dataUtils.createDataRowClass(recovery_table, columns=recColumns) # create a temporary table to store the eligible foreground events that can be matched if verbose: print >> sys.stdout, "Getting eligible events..." make_rec_sngls_table(connection, recovery_table) # if using rough match, create an index for it rough_match_test = '' if rough_match is not None: simRough, recRough, winRough = rough_match simRough = sqlutils.validate_option(simRough) recRough = sqlutils.validate_option(recRough) sqlquery = "CREATE INDEX rs_rmtch_idx ON rec_sngls (%s)" % recRough connection.cursor().execute(sqlquery) rough_match_test = "rec_sngls.%s >= sim.%s - %f AND rec_sngls.%s <= sim.%s + %f AND\n" % ( recRough, simRough, winRough, recRough, simRough, winRough) # # Remove triggers that match all_data triggers # if rejection_criteria != []: if verbose: print >> sys.stdout, "Applying rejection test to eligible events..." # If comparing to all_data for rejection, create a temp table of all data events # This rejection test only uses the single-ifo triggers from coincident events sqlquery = ''.join([ ''' CREATE TEMP TABLE all_data_sngls AS SELECT ''', recovery_table, '''.* FROM ''', recovery_table, ''' ''', sqlutils.join_experiment_tables_to_sngl_table(recovery_table), ''' WHERE experiment_summary.datatype == "all_data"''' ]) connection.cursor().execute(sqlquery) rough_test = '' if rough_rejection is not None: rejRough, rejRoughWin = rough_rejection rejRough = sqlutils.validate_option(rejRough) sqlquery = "CREATE INDEX ads_rmtch_idx ON all_data_sngls (%s)" % rejRough connection.cursor().execute(sqlquery) rough_test = "all_data_sngls.%s >= rec_sngls.%s - %f AND all_data_sngls.%s <= rec_sngls.%s + %f AND\n" % ( rejRough, rejRough, rejRoughWin, rejRough, rejRough, rejRoughWin) # cycle over the rejection criteria, creating a function in the database for each rejection_tests = [] for n, (thisFunc, window) in enumerate(rejection_criteria): compF = dataUtils.CompareDataRows(RecDataRow, RecDataRow) funcName = 'matches_all_data%i' % n if thisFunc == 'eThinca': compF.create_dbCompF(connection, compF.eThincaSngl, funcName, window, recColumns, recColumns) else: compF.set_matchCriteriaA(thisFunc) compF.set_matchCriteriaB(thisFunc) compF.create_dbCompF(connection, compF.diffRowARowB, funcName, window, recColumns, recColumns) simSnglCols = ','.join( ['rec_sngls.%s' % (col) for col in compF.neededColumnsA]) allSnglCols = ','.join( ['all_data_sngls.%s' % (col) for col in compF.neededColumnsB]) rejection_tests.append('%s(%s, %s)' % (funcName, simSnglCols, allSnglCols)) # now remove triggers sqlquery = ''.join([ ''' DELETE FROM rec_sngls WHERE EXISTS ( SELECT * FROM all_data_sngls WHERE ''', rough_test, '\nAND '.join(rejection_tests), ')' ]) connection.cursor().execute(sqlquery) connection.commit() # # Determine Sim-Sngl matches # if verbose: print >> sys.stdout, "Applying match criteria to find sim-sngl maps..." # cycle over the match criteria, creating a function in the database for each match_tests = [] for n, (simFunc, snglFunc, window) in enumerate(match_criteria): compF = dataUtils.CompareDataRows(SimDataRow, RecDataRow) # set the name of the compare function to use in the database funcName = 'are_match%i' % n # set the functions that get the match criteria from the tables # and create the compare function in the database if simFunc == 'eThinca': compF.create_dbCompF(connection, compF.eThincaSim, funcName, window, simColumns, recColumns) else: compF.set_matchCriteriaA(simFunc) compF.set_matchCriteriaB(snglFunc) compF.create_dbCompF(connection, compF.diffSimSngl, funcName, window, simColumns, recColumns) simCols = ','.join(['sim.%s' % (col) for col in compF.neededColumnsA]) snglCols = ','.join( ['rec_sngls.%s' % (col) for col in compF.neededColumnsB]) match_tests.append('%s(%s, %s)' % (funcName, simCols, snglCols)) # determine matches sqlquery = ''.join([ """ CREATE TEMP TABLE found_inj AS SELECT sim.simulation_id AS sim_id, rec_sngls.event_id AS event_id FROM """, simulation_table, """ AS sim, rec_sngls WHERE sim.process_id == rec_sngls.sim_proc_id AND """, rough_match_test, '\n\t\tAND'.join(match_tests) ]) connection.cursor().execute(sqlquery) connection.commit()
def dbinjfind( connection, simulation_table, recovery_table, match_criteria, rough_match = None, rejection_criteria = [], rough_rejection = None, verbose = False ): # validate simulation_table and recovery_table simulation_table = sqlutils.validate_option( simulation_table ) recovery_table = sqlutils.validate_option( recovery_table ) # create DataRow classes to store data for each table simColumns = sqlutils.get_column_names_from_table( connection, simulation_table ) recColumns = sqlutils.get_column_names_from_table( connection, recovery_table ) SimDataRow = dataUtils.createDataRowClass( simulation_table, columns = simColumns ) RecDataRow = dataUtils.createDataRowClass( recovery_table, columns = recColumns ) # create a temporary table to store the eligible foreground events that can be matched if verbose: print >> sys.stdout, "Getting eligible events..." make_rec_sngls_table( connection, recovery_table ) # if using rough match, create an index for it rough_match_test = '' if rough_match is not None: simRough, recRough, winRough = rough_match simRough = sqlutils.validate_option( simRough ) recRough = sqlutils.validate_option( recRough ) sqlquery = "CREATE INDEX rs_rmtch_idx ON rec_sngls (%s)" % recRough connection.cursor().execute( sqlquery ) rough_match_test = "rec_sngls.%s >= sim.%s - %f AND rec_sngls.%s <= sim.%s + %f AND\n" %( recRough, simRough, winRough, recRough, simRough, winRough ) # # Remove triggers that match all_data triggers # if rejection_criteria != []: if verbose: print >> sys.stdout, "Applying rejection test to eligible events..." # If comparing to all_data for rejection, create a temp table of all data events # This rejection test only uses the single-ifo triggers from coincident events sqlquery = ''.join([''' CREATE TEMP TABLE all_data_sngls AS SELECT ''', recovery_table, '''.* FROM ''', recovery_table, ''' ''', sqlutils.join_experiment_tables_to_sngl_table( recovery_table ), ''' WHERE experiment_summary.datatype == "all_data"''' ]) connection.cursor().execute(sqlquery) rough_test = '' if rough_rejection is not None: rejRough, rejRoughWin = rough_rejection rejRough = sqlutils.validate_option( rejRough ) sqlquery = "CREATE INDEX ads_rmtch_idx ON all_data_sngls (%s)" % rejRough connection.cursor().execute( sqlquery ) rough_test = "all_data_sngls.%s >= rec_sngls.%s - %f AND all_data_sngls.%s <= rec_sngls.%s + %f AND\n" % ( rejRough, rejRough, rejRoughWin, rejRough, rejRough, rejRoughWin ) # cycle over the rejection criteria, creating a function in the database for each rejection_tests = [] for n,(thisFunc, window) in enumerate(rejection_criteria): compF = dataUtils.CompareDataRows(RecDataRow, RecDataRow) funcName = 'matches_all_data%i' % n # Note: setting the match criteria also sets the needed columns compF.set_matchCriteriaA(thisFunc) compF.set_matchCriteriaB(thisFunc) # need different diff function if using eThinca if thisFunc == 'eThinca': diffFunc = compF.eThincaSngl else: diffFunc = compF.diffRowARowB compF.create_dbCompF(connection, diffFunc, funcName, window) simSnglCols = ','.join(['rec_sngls.%s' %(col) for col in compF.neededColumnsA]) allSnglCols = ','.join(['all_data_sngls.%s' %(col) for col in compF.neededColumnsB]) rejection_tests.append( '%s(%s, %s)' %(funcName, simSnglCols, allSnglCols) ) # now remove triggers sqlquery = ''.join([ ''' DELETE FROM rec_sngls WHERE EXISTS ( SELECT * FROM all_data_sngls WHERE ''', rough_test, '\nAND '.join( rejection_tests ), ')' ]) connection.cursor().execute(sqlquery) connection.commit() # # Determine Sim-Sngl matches # if verbose: print >> sys.stdout, "Applying match criteria to find sim-sngl maps..." # cycle over the match criteria, creating a function in the database for each match_tests = [] for n,(simFunc, snglFunc, window) in enumerate(match_criteria): compF = dataUtils.CompareDataRows(SimDataRow, RecDataRow) # set the name of the compare function to use in the database funcName = 'are_match%i' % n compF.set_matchCriteriaA(simFunc) compF.set_matchCriteriaB(snglFunc) # need different diff function if using eThinca if simFunc == 'eThinca': diffFunc = compF.eThincaSim else: diffFunc = compF.diffSimSngl compF.create_dbCompF(connection, diffFunc, funcName, window) simCols = ','.join(['sim.%s'%(col) for col in compF.neededColumnsA]) snglCols = ','.join(['rec_sngls.%s'%(col) for col in compF.neededColumnsB]) match_tests.append( '%s(%s, %s)' %(funcName, simCols, snglCols) ) # determine matches sqlquery = ''.join([""" CREATE TEMP TABLE found_inj AS SELECT sim.simulation_id AS sim_id, rec_sngls.event_id AS event_id FROM """, simulation_table, """ AS sim, rec_sngls WHERE sim.process_id == rec_sngls.sim_proc_id AND """, rough_match_test, '\n\t\tAND'.join( match_tests ) ]) connection.cursor().execute(sqlquery) connection.commit()
def create_dbCompF(self, connection, diffFunc, compFuncName, window, classAcolumns=None, classBcolumns=None): """ Creates a function in the given connection to a database that allows the given diffFunc to be performed on classA and classB on the fly. To ensure data is populated correctly, self.neededColumnsA and self.neededColumnsB are set to whatever the order of columns is in the database. This can be overwritten using classAcolumns and classBcolumns @connection: a connection to SQLite database @diffFunc: the function to use to do comparisons; must be one of the functions defined in this class @compFuncName: what to call the call function in the database; this must be unique @window: the size of the window to use when determining whether or not classA and classB are the same @classAcolumns: set what columns will be used in the function, and the order they will be passed. Use this if not all of the columns in classA's table will be used. @classBcolumns: set what columns will be used in the function, and the order they will be passed. Use this if not all of the columns in classB's table will used. """ self.set_diffFunc(diffFunc) self.set_window(window) classAcolumns = sqlutils.get_column_names_from_table( connection, self.classA.tableName) classBcolumns = sqlutils.get_column_names_from_table( connection, self.classB.tableName) if self.diffFunc == self.eThincaSim or self.diffFunc == self.eThincaSngl: self.set_neededColumnsA(classAcolumns) self.set_neededColumnsB(classBcolumns) else: # figure out what columns are needed if self.matchCriteriaA == 'startTime': neededColumnsA = [ col for col in classAcolumns if re.search('ifo|start_time', col) is not None ] elif self.matchCriteriaA == 'endTime': neededColumnsA = [ col for col in classAcolumns if re.search('ifo|end_time', col) is not None ] else: neededColumnsA = [ col for col in classAcolumns if re.search(self.matchCriteriaA, col) is not None ] self.set_neededColumnsA(neededColumnsA) if self.matchCriteriaB == 'startTime': neededColumnsB = [ col for col in classBcolumns if re.search('ifo|start_time', col) is not None ] elif self.matchCriteriaB == 'endTime': neededColumnsB = [ col for col in classBcolumns if re.search('ifo|end_time', col) is not None ] else: neededColumnsB = [ col for col in classBcolumns if re.search(self.matchCriteriaB, col) is not None ] self.set_neededColumnsB(neededColumnsB) connection.create_function( compFuncName, len(self.neededColumnsA) + len(self.neededColumnsB), self.dbWrapper)