def _get_individual_transient_metadata( self): """ get individual transient metadata from the transient database """ self.log.debug( 'starting the ``_get_individual_transient_metadata`` method') for tran in self.transientIdList: sqlQuery = u""" select id, followup_id, ra_psf 'ra', dec_psf 'dec', local_designation 'name', ps1_designation, object_classification, local_comments, detection_list_id from tcs_transient_objects where id = %(tran)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) if len(rows): self.transientsMetadataList.append(rows[0]) else: log.warning( 'could not find transient in database with id %(tran)s' % locals()) self.log.debug( 'completed the ``_get_individual_transient_metadata`` method') return None
def _count_galaxies_requiring_metadata( self): """ count galaxies requiring metadata **Return:** - ``self.total``, ``self.batches`` -- total number of galaxies needing metadata & the number of batches required to be sent to NED """ self.log.info( 'starting the ``_count_galaxies_requiring_metadata`` method') tableName = self.dbTableName sqlQuery = u""" select count(*) as count from %(tableName)s where master_row = 1 and in_ned is null """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.total = rows[0]["count"] self.batches = int(self.total / 3000.) + 1 if self.total == 0: self.batches = 0 self.log.info( 'completed the ``_count_galaxies_requiring_metadata`` method') return self.total, self.batches
def _grab_column_name_map_from_database( self): """ grab column name map from database **Return:** - None """ self.log.info( 'starting the ``_grab_column_name_map_from_database`` method') # GRAB THE NAMES OF THE IMPORTANT COLUMNS FROM DATABASE sqlQuery = u""" select view_name, raColName, decColName, object_type, subTypeColName, objectNameColName, redshiftColName, distanceColName, semiMajorColName, semiMajorToArcsec, table_id, table_name, object_type_accuracy from tcs_helper_catalogue_views_info v, tcs_helper_catalogue_tables_info t where v.table_id = t.id """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.colMaps = {} for row in rows: self.colMaps[row["view_name"]] = row self.log.info( 'completed the ``_grab_column_name_map_from_database`` method') return None
def _get_3000_galaxies_needing_metadata( self): """ get 3000 galaxies needing metadata **Return:** - ``len(self.theseIds)`` -- the number of NED IDs returned """ self.log.info( 'starting the ``_get_3000_galaxies_needing_metadata`` method') tableName = self.dbTableName # SELECT THE DATA FROM NED TABLE self.theseIds = [] sqlQuery = u""" select primary_ned_id from %(tableName)s where master_row = 1 and in_ned is null limit 3000; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: self.theseIds.append(row["primary_ned_id"]) self.log.info( 'completed the ``_get_3000_galaxies_needing_metadata`` method') return len(self.theseIds)
def _get_stream_view_infos(self): """ get table infos **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _get_view_infos method - @review: when complete add logging """ self.log.info("starting the ``_get_stream_view_infos`` method") sqlQuery = ( u""" SELECT * FROM crossmatch_catalogues.tcs_helper_catalogue_tables_info where legacy_table = 0 and table_name not like "legacy%%" and table_name like "%%stream" order by number_of_rows desc; """ % locals() ) self.streamInfo = dms.execute_mysql_read_query(sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log) self.log.info("completed the ``_get_stream_view_infos`` method") return None
def _get_transient_metadata_from_database_list( self): """ get transient metadata from a given workflow list in the transient database """ self.log.debug( 'starting the ``_get_transient_metadata_from_database_list`` method') sqlQuery = self.settings["database settings"][ "transients"]["transient query"] self.transientsMetadataList = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) self.log.debug( 'completed the ``_get_transient_metadata_from_database_list`` method') return None
def create_dictionary_of_pessto_marshall_streams( self): """create dictionary of pessto_marshall_streams **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean create_dictionary_of_pessto_marshall_streams method - @review: when complete add logging """ self.log.info( 'starting the ``create_dictionary_of_pessto_marshall_streams`` method') dictList = [] tableName = self.dbTableName rows = dms.execute_mysql_read_query( sqlQuery=self.marshallQuery, dbConn=self.pmDbConn, log=self.log ) totalCount = len(rows) count = 0 for row in rows: count += 1 if count > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") print "%(count)s / %(totalCount)s `%(tableName)s` data added to memory" % locals() dictList.append(dict(row)) self.log.info( 'completed the ``create_dictionary_of_pessto_marshall_streams`` method') return dictList
def refresh_voevent_feeds(self): """ *Refresh all VoEvents in the *subscriptionTable* - adding feed items to the relevant VoEvent channel tables Return: - ``None``* """ ## > IMPORTS ## import dryxPython.webcrawlers as wc import dryxPython.mysql as m import dryxPython.commonutils as cu from VOEventLib import Vutil as vou import VOEventLib as vo import re import sys ## >SETTINGS ## ## LOGGING HEADER ## log.info('<m> STARTING TO REFRESH THE VOEVENT STREAMS FOUND IN ' + self.subscriptionTable + '<m>') ########################################################### # >ACTION(S) # ########################################################### # CREATE DOWNLOADS DIRECTORY cu.dryx_mkdir(self._downloadDirectory) # READ THE FEED NAMES AND URLS FROM SUBSCRIPTION TABLE sqlQuery = 'SELECT rssFeedSource, rssFeedName, feedURL, dateLastRead, uniqueKeyCols from ' + \ self.subscriptionTable try: log.debug("attempting to read the feed names and urls from the %s table" % ( self.subscriptionTable,)) feeds = m.execute_mysql_read_query(sqlQuery, self.dbConn) except Exception, e: log.error("could not read the feed names and urls from the %s table - failed with this error %s: " % (self.subscriptionTable, str(e),)) return -1
def refresh_rss_feeds(self): """ *Refresh all feeds in the *subscriptionTable* - adding feed items to the the relevant RSS channel tables* **Key Arguments:** Return: - ``None`` """ ## > IMPORTS ## import dryxPython.webcrawlers as wc import dryxPython.mysql as m import dryxPython.commonutils as cu ## >SETTINGS ## ## LOGGING HEADER ## log.info('<m> STARTING TO REFRESH THE FEEDS FOUND IN ' + self.subscriptionTable + '<m>') ########################################################### # >ACTION(S) # ########################################################### # CREATE DOWNLOADS DIRECTORY cu.dryx_mkdir(self._downloadDirectory) # READ THE FEED NAMES AND URLS FROM SUBSCRIPTION TABLE sqlQuery = 'SELECT rssFeedName, feedURL, rssFeedSource, dateLastRead, uniqueKeyCols from ' + \ self.subscriptionTable try: log.debug("attempting to reading feed data from the subscription table : %s" % ( self.subscriptionTable,)) feeds = m.execute_mysql_read_query(sqlQuery, dbConn, log) except Exception, e: log.error("could not reading feed data from the subscription table : %s - failed with this error %s: " % (self.subscriptionTable, str(e),)) return -1
def set_subscription(self, feedURL, rssFeedName, rssFeedSource, uniqueColumns): """ *Add an XML subscription to the *subscriptionTable** **Key Arguments:** - ``feedURL`` -- the URL of the XML file for the rss feed channel - ``rssFeedName`` -- name of the rss feed channel to be logged in subscription table e.g. BBC News - ``feedSoruce`` -- the top level source e.g. BBC - ``uniqueColumns`` -- list of columns to set as unique so that duplicate items are ignored Return: - ``None`` """ ## > IMPORTS ## import dryxPython.commonutils as cu import dryxPython.webcrawlers as wc import dryxPython.mysql as m import sys ## >SETTINGS ## ########################################################### # >ACTION(S) # ########################################################### # FIRST CHECK TO MAKE SURE THE FEED IS NOT SUBSCRIBED TO ALREADY sqlQuery = """SELECT table_name FROM information_schema.tables WHERE table_schema = DATABASE() AND table_name = '%s';""" % (self.subscriptionTable,) try: log.debug("attempting to check if the %s feed is subscribed to yet" % ( self.subscriptionTable,)) rows = m.execute_mysql_read_query(sqlQuery, self.dbConn) except Exception, e: log.error("could not check if the %s feed is subscribed to yet - failed with this error %s: " % (self.subscriptionTable, str(e),)) return -1
def create_master_id( self): """create master id **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean create_master_id method - @review: when complete add logging """ self.log.info('starting the ``create_master_id`` method') batchSize = 1250 t = self.dbTableName if "photo" in t: totalRows = 500000000 else: sqlQuery = u""" select distinct objId from %(t)s where qubMasterFlag = 2 """ % locals() count = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalRows = len(count) count = "" total = totalRows batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches): end = end + batchSize if end > total: end = total start = i * batchSize if start > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") percent = (float(end) / float(totalRows)) * 100. print "%(end)s / %(totalRows)s (%(percent)1.1f%%) masterFlags updated in %(t)s" % locals() sqlQuery = u""" select distinct objid from %(t)s where qubMasterFlag = 2 limit 0, %(batchSize)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = "" for row in rows: objid = row["objid"] sqlQuery = sqlQuery + \ u"""\nupdate %(t)s set qubMasterFlag = 1 where qubPrimaryId = (select * from (SELECT qubPrimaryId FROM %(t)s where objId = %(objid)s order by clean desc limit 1) as alias); update %(t)s set qubMasterFlag = 0 where objId = %(objid)s and qubMasterFlag != 1;""" % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, Force=False ) self.log.info('completed the ``create_master_id`` method') return None
def set_master_obj_types( self): """set master obj types **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean set_master_obj_types method - @review: when complete add logging """ self.log.info('starting the ``set_master_obj_types`` method') batchSize = 1250 t = self.dbTableName if "photo" in t: totalRows = 500000000 return else: sqlQuery = u""" select distinct objid from %(t)s where objType = "Q" """ % locals() count = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalRows = len(count) count = "" total = totalRows batches = int(total / batchSize) start = 0 end = 0 theseBatches = [] for i in range(batches): end = end + batchSize if end > total: end = total start = i * batchSize if start > 1: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") percent = (float(end) / float(totalRows)) * 100. print "%(end)s / %(totalRows)s (%(percent)1.1f%%) master objIds updated in %(t)s" % locals() sqlQuery = u""" select distinct objid from %(t)s where objType = "Q" limit %(start)s , %(batchSize)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = "" for row in rows: objid = row["objid"] sqlQuery = sqlQuery + \ u"""\nupdate %(t)s set objType = "Q" where objId = %(objid)s;""" % locals( ) dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, Force=False ) self.log.info('completed the ``set_master_obj_types`` method') return None
def _update_sdss_coverage( self): """ update sdss coverage """ self.log.info('starting the ``_update_sdss_coverage`` method') tableName = self.dbTableName # SELECT THE LOCATIONS NEEDING TO BE CHECKED sqlQuery = u""" select primary_ned_id, primaryID, raDeg, decDeg, sdss_coverage from %(tableName)s where sdss_coverage is null and master_row = 1 and in_ned = 1 order by dist_mpc; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) totalCount = len(rows) count = 0 for row in rows: count += 1 if count > 1: # Cursor up three lines and clear sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount percent = (float(count) / float(totalCount)) * 100. primaryID = row["primaryID"] raDeg = float(row["raDeg"]) decDeg = float(row["decDeg"]) primary_ned_id = row["primary_ned_id"] # SDSS CAN ONLY ACCEPT 60 QUERIES/MIN time.sleep(1.1) print "%(count)s / %(totalCount)s (%(percent)1.1f%%) NED galaxies checked for SDSS coverage" % locals() print "NED NAME: ", primary_ned_id sdss_coverage = dat.check_for_sdss_coverage.check_for_sdss_coverage( log=self.log, raDeg=raDeg, decDeg=decDeg ) if sdss_coverage == 999: sdss_coverage_flag = "null" elif sdss_coverage == True: sdss_coverage_flag = 1 elif sdss_coverage == False: sdss_coverage_flag = 0 else: self.log.error('cound not get sdss coverage' % locals()) sys.exit(0) # UPDATE THE DATABASE FLAG sqlQuery = u""" update %(tableName)s set sdss_coverage = %(sdss_coverage_flag)s where primaryID = %(primaryID)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``_update_sdss_coverage`` method') return None
def _query_ned_and_add_results_to_database( self, batchCount): """ query ned and add results to database **Key Arguments:** - ``batchCount`` - the index number of the batch sent to NED """ self.log.info( 'starting the ``_query_ned_and_add_results_to_database`` method') tableName = self.dbTableName # QUERY NED WITH BATCH totalCount = len(self.theseIds) print "requesting metadata from NED for %(totalCount)s galaxies (batch %(batchCount)s)" % locals() search = namesearch( log=self.log, names=self.theseIds, quiet=True ) results = search.get() print "results returned from ned -- starting to add to database" % locals() # CLEAN THE RETURNED DATA AND UPDATE DATABASE totalCount = len(results) count = 0 sqlQuery = "" for thisDict in results: thisDict["tableName"] = tableName count += 1 for k, v in thisDict.iteritems(): if not v or len(v) == 0: thisDict[k] = "null" if k in ["major_diameter_arcmin", "minor_diameter_arcmin"] and (":" in v or "?" in v or "<" in v): thisDict[k] = v.replace(":", "").replace( "?", "").replace("<", "") if isinstance(v, str) and '"' in v: thisDict[k] = v.replace('"', '\\"') if "Input name not" not in thisDict["input_note"] and "Same object as" not in thisDict["input_note"]: thisDict[ "raDeg"] = dat.ra_sexegesimal_to_decimal.ra_sexegesimal_to_decimal(thisDict["ra"]) thisDict[ "decDeg"] = dat.declination_sexegesimal_to_decimal.declination_sexegesimal_to_decimal(thisDict["dec"]) sqlQuery += u""" update %(tableName)s set redshift_quality = "%(redshift_quality)s", redshift = %(redshift)s, hierarchy = "%(hierarchy)s", object_type = "%(object_type)s", major_diameter_arcmin = %(major_diameter_arcmin)s, morphology = "%(morphology)s", magnitude_filter = "%(magnitude_filter)s", ned_notes = "%(ned_notes)s", eb_v = %(eb-v)s, raDeg = %(raDeg)s, radio_morphology = "%(radio_morphology)s", activity_type = "%(activity_type)s", minor_diameter_arcmin = %(minor_diameter_arcmin)s, decDeg = %(decDeg)s, redshift_err = %(redshift_err)s, in_ned = 1 where primary_ned_id = "%(input_name)s" and master_row = 1;\n """ % thisDict else: sqlQuery += u""" update %(tableName)s set in_ned = 0 where primary_ned_id = "%(input_name)s" and master_row = 1;\n """ % thisDict sqlQuery = sqlQuery.replace('"null"', 'null') sqlQuery = sqlQuery.replace('"null"', 'null') rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) print "%(count)s/%(totalCount)s galaxy metadata batch entries added to database" % locals() if count < totalCount: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") self.log.info( 'completed the ``_query_ned_and_add_results_to_database`` method') return None
def _update_transient_database( self): """ update transient database **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _update_transient_database method - @review: when complete add logging """ self.log.debug('starting the ``_update_transient_database`` method') from datetime import datetime, date, time now = datetime.now() now = now.strftime("%Y-%m-%d %H:%M:%S") transientTable = self.settings["database settings"][ "transients"]["transient table"] transientTableClassCol = self.settings["database settings"][ "transients"]["transient classification column"] transientTableIdCol = self.settings["database settings"][ "transients"]["transient primary id column"] for c in self.classifications: objectType = c["object_classification_new"] transientObjectId = c["id"] # DELETE PREVIOUS CROSSMATCHES sqlQuery = u""" delete from tcs_cross_matches where transient_object_id = %(transientObjectId)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) # INSERT NEW CROSSMATCHES for crossmatch in c["crossmatches"]: for k, v in crossmatch.iteritems(): if v == None: crossmatch[k] = "null" if "physical_separation_kpc" not in crossmatch.keys(): crossmatch["physical_separation_kpc"] = "null" if crossmatch["sourceSubType"] and "null" not in str(crossmatch["sourceSubType"]): crossmatch["sourceSubType"] = '"%s"' % (crossmatch[ "sourceSubType"],) else: crossmatch["sourceSubType"] = "null" sqlQuery = u""" insert into tcs_cross_matches ( transient_object_id, catalogue_object_id, catalogue_table_id, catalogue_object_ra, catalogue_object_dec, original_search_radius_arcsec, separation, z, scale, distance, distance_modulus, date_added, association_type, physical_separation_kpc, catalogue_object_type, catalogue_object_subtype, catalogue_table_name, catalogue_view_name, search_name, major_axis_arcsec, direct_distance, direct_distance_scale, direct_distance_modulus ) values ( %s, "%s", %s, %s, %s, %s, %s, %s, %s, %s, %s, "%s", "%s", %s, "%s", %s, "%s", "%s", "%s", %s, %s, %s, %s) """ % (crossmatch["transientObjectId"], crossmatch["catalogueObjectId"], crossmatch["catalogueTableId"], crossmatch["sourceRa"], crossmatch["sourceDec"], crossmatch["originalSearchRadius"], crossmatch["separation"], crossmatch["z"], crossmatch["scale"], crossmatch["distance"], crossmatch["distanceModulus"], now, crossmatch["association_type"], crossmatch["physical_separation_kpc"], crossmatch["sourceType"], crossmatch["sourceSubType"], crossmatch["catalogueTableName"], crossmatch["catalogueViewName"], crossmatch["searchName"], crossmatch["xmmajoraxis"], crossmatch["xmdirectdistance"], crossmatch["xmdirectdistancescale"], crossmatch["xmdirectdistanceModulus"]) dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) for ob in self.transientsMetadataList: transId = ob["id"] name = ob["name"] sqlQuery = u""" select id, separation, catalogue_view_name, association_type, physical_separation_kpc, major_axis_arcsec from tcs_cross_matches where transient_object_id = %(transId)s order by separation """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) rankScores = [] for row in rows: if row["separation"] < 2. or (row["physical_separation_kpc"] != "null" and row["physical_separation_kpc"] < 20. and row["association_type"] == "SN") or (row["major_axis_arcsec"] != "null" and row["association_type"] == "SN"): # print row["separation"] # print row["physical_separation_kpc"] # print row["major_axis_arcsec"] rankScore = 2. - \ self.colMaps[row["catalogue_view_name"]][ "object_type_accuracy"] * 0.1 # print rankScore else: # print row["separation"] # print row["physical_separation_kpc"] # print row["major_axis_arcsec"] rankScore = row["separation"] + 1. - \ self.colMaps[row["catalogue_view_name"]][ "object_type_accuracy"] * 0.1 rankScores.append(rankScore) rank = 0 for rs, row in sorted(zip(rankScores, rows)): rank += 1 primaryId = row["id"] sqlQuery = u""" update tcs_cross_matches set rank = %(rank)s where id = %(primaryId)s """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) sqlQuery = u""" select distinct association_type from (select association_type from tcs_cross_matches where transient_object_id = %(transId)s order by rank) as alias; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) classification = "" for row in rows: classification += row["association_type"] + "/" classification = classification[:-1] if len(classification) == 0: classification = "ORPHAN" sqlQuery = u""" update %(transientTable)s set %(transientTableClassCol)s = "%(classification)s" where %(transientTableIdCol)s = %(transId)s """ % locals() print """%(name)s: %(classification)s """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.transientsDbConn, log=self.log ) self.log.debug('completed the ``_update_transient_database`` method') return None
def _remove_previous_ned_queries( self, coordinateList): """ remove previous ned queries **Key Arguments:** # - ``coordinateList`` -- set of coordinate to check for previous queries **Return:** - ``updatedCoordinateList`` -- coordinate list with previous queries removed """ self.log.info('starting the ``_remove_previous_ned_queries`` method') # IMPORTS import htmCircle import math from dryxPython import astrotools as dat from datetime import datetime, date, time, timedelta # 1 DEGREE QUERY RADIUS radius = 60. * 60. updatedCoordinateList = [] # FOR EACH TRANSIENT IN COORDINATE LIST for c in coordinateList: this = c.split(" ") raDeg = float(this[0]) decDeg = float(this[1]) # BUILD WHERE SECTION OF CLAUSE htmWhereClause = htmCircle.htmCircleRegion( 16, raDeg, decDeg, float(radius)) # CONVERT RA AND DEC TO CARTESIAN COORDINATES ra = math.radians(raDeg) dec = math.radians(decDeg) cos_dec = math.cos(dec) cx = math.cos(ra) * cos_dec cy = math.sin(ra) * cos_dec cz = math.sin(dec) cartesians = (cx, cy, cz) # CREATE CARTESIAN SECTION OF QUERY cartesianClause = 'and (cx * %.17f + cy * %.17f + cz * %.17f >= cos(%.17f))' % ( cartesians[0], cartesians[1], cartesians[2], math.radians(radius / 3600.0)) # CALCULATE THE OLDEST RESULTS LIMIT now = datetime.now() td = timedelta( days=self.settings["ned stream refresh rate in days"]) refreshLimit = now - td refreshLimit = refreshLimit.strftime("%Y-%m-%d %H:%M:%S") # FINALLY BUILD THE FULL QUERY AND HIT DATABASE sqlQuery = "select * from tcs_helper_ned_query_history %(htmWhereClause)s %(cartesianClause)s and dateQueried > '%(refreshLimit)s'" % locals( ) rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) # DETERMINE WHICH COORDINATES REQUIRE A NED QUERY match = False for row in rows: raStream = row["raDeg"] decStream = row["decDeg"] radiusStream = row["arcsecRadius"] dateStream = row["dateQueried"] angularSeparation, northSep, eastSep = dat.get_angular_separation( log=self.log, ra1=raDeg, dec1=decDeg, ra2=raStream, dec2=decStream ) if angularSeparation + self.settings["first pass ned search radius arcec"] < radiusStream: match = True if match == False: updatedCoordinateList.append(c) self.log.info('completed the ``_remove_previous_ned_queries`` method') return updatedCoordinateList
def _update_tcs_helper_catalogue_views_info_with_new_views( self): """ update tcs helper catalogue tables info with new tables **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _update_tcs_helper_catalogue_views_info_with_new_views method - @review: when complete add logging """ self.log.info( 'starting the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method') sqlQuery = u""" SELECT max(id) as thisId FROM tcs_helper_catalogue_views_info; """ % locals() thisId = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) try: highestId = thisId[0]["thisId"] + 1 except: highestId = 1 sqlQuery = u""" SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE='VIEW' AND TABLE_SCHEMA='crossmatch_catalogues' and TABLE_NAME like "tcs_view%%" and TABLE_NAME not like "%%helper%%"; """ % locals() tablesInDatabase = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) sqlQuery = u""" SELECT view_name FROM tcs_helper_catalogue_views_info; """ % locals() tableList = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) tbList = [] for tb in tableList: tbList.append(tb["view_name"]) for tb in tablesInDatabase: if tb["TABLE_NAME"] not in tbList: thisViewName = tb["TABLE_NAME"] print "`%(thisViewName)s` added to `tcs_helper_catalogue_views_info` database table" % locals() sqlQuery = u""" INSERT INTO tcs_helper_catalogue_views_info ( id, view_name ) VALUES ( %(highestId)s, "%(thisViewName)s" )""" % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) highestId += 1 self.log.info( 'completed the ``_update_tcs_helper_catalogue_views_info_with_new_views`` method') return None
def _clean_up_columns( self): """clean up columns **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _clean_up_columns method - @review: when complete add logging """ self.log.info('starting the ``_clean_up_columns`` method') sqlQueries = [ "update tcs_helper_catalogue_tables_info set old_table_name = table_name where old_table_name is null;", "update tcs_helper_catalogue_tables_info set version_number = 'stream' where table_name like '%%stream' and version_number is null;", """update tcs_helper_catalogue_tables_info set in_ned = 0 where table_name like '%%stream' and in_ned is null;""", """update tcs_helper_catalogue_tables_info set vizier_link = 0 where table_name like '%%stream' and vizier_link is null;""", "update tcs_helper_catalogue_views_info set old_view_name = view_name where old_view_name is null;", ] for sqlQuery in sqlQueries: dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) # VIEW OBJECT TYPES sqlQuery = u""" SELECT view_name FROM crossmatch_catalogues.tcs_helper_catalogue_views_info where legacy_view = 0 and object_type is null; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: view_name = row["view_name"] object_type = view_name.replace("tcs_view_", "").split("_")[0] sqlQuery = u""" update tcs_helper_catalogue_views_info set object_type = "%(object_type)s" where view_name = "%(view_name)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) # MASTER TABLE ID FOR VIEWS sqlQuery = u""" SELECT view_name FROM crossmatch_catalogues.tcs_helper_catalogue_views_info where legacy_view = 0 and table_id is null; """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: view_name = row["view_name"] table_name = view_name.replace("tcs_view_", "").split("_")[1:] table_name = ("_").join(table_name) table_name = "tcs_cat_%(table_name)s" % locals() print table_name sqlQuery = u""" update tcs_helper_catalogue_views_info set table_id = (select id from tcs_helper_catalogue_tables_info where table_name = "%(table_name)s") where view_name = "%(view_name)s" """ % locals() dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) self.log.info('completed the ``_clean_up_columns`` method') return None
def _updated_row_counts_in_tcs_helper_catalogue_tables_info( self): """ updated row counts in tcs catalogue tables **Key Arguments:** # - **Return:** - None **Todo** - @review: when complete, clean _updated_row_counts_in_tcs_helper_catalogue_tables_info method - @review: when complete add logging """ self.log.info( 'starting the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method') sqlQuery = u""" select * from tcs_helper_catalogue_tables_info where table_name like "%%stream" or number_of_rows is null and legacy_table = 0 """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: tbName = row["table_name"] sqlQuery = u""" update tcs_helper_catalogue_tables_info set number_of_rows = (select count(*) as count from %(tbName)s) where table_name = "%(tbName)s" """ % locals() count = dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, quiet=True ) print "Row counts updated in `tcs_helper_catalogue_tables_info` database table" sqlQuery = u""" select * from tcs_helper_catalogue_views_info where view_name like "%%stream" or number_of_rows is null and legacy_view = 0 """ % locals() rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log ) for row in rows: tbName = row["view_name"] sqlQuery = u""" update tcs_helper_catalogue_views_info set number_of_rows = (select count(*) as count from %(tbName)s) where view_name = "%(tbName)s" """ % locals() count = dms.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=self.cataloguesDbConn, log=self.log, quiet=True ) self.log.info( 'completed the ``_updated_row_counts_in_tcs_helper_catalogue_tables_info`` method') return None
def _grab_conesearch_results_from_db( self): """ grab conesearch results from db """ self.log.debug( 'starting the ``_grab_conesearch_results_from_db`` method') # ACCOUNT FOR TYPE OF SEARCH if self.physicalSearch == False and self.transType == "SN": where = "" if self.colMaps[self.tableName]["redshiftColName"]: where += " and %s is null" % ( self.colMaps[self.tableName]["redshiftColName"],) if self.colMaps[self.tableName]["distanceColName"]: where += " and %s is null" % ( self.colMaps[self.tableName]["distanceColName"],) if self.colMaps[self.tableName]["semiMajorColName"]: where += " and %s is null" % ( self.colMaps[self.tableName]["semiMajorColName"],) self.sqlQuery += where elif self.physicalSearch == True: where = "" if self.colMaps[self.tableName]["redshiftColName"]: where += " or %s is not null" % ( self.colMaps[self.tableName]["redshiftColName"],) if self.colMaps[self.tableName]["distanceColName"]: where += " or %s is not null" % ( self.colMaps[self.tableName]["distanceColName"],) if self.colMaps[self.tableName]["semiMajorColName"]: where += " or %s is not null" % ( self.colMaps[self.tableName]["semiMajorColName"],) if len(where): where = " and (" + where[4:] + ")" self.sqlQuery += where self.results = [] rows = dms.execute_mysql_read_query( sqlQuery=self.sqlQuery, dbConn=self.dbConn, log=self.log ) if len(rows): # IF ONLY A COUNT(*) if self.queryType == 3: self.results = [[0.0, rows[0]['number']]] return "Count", self.results # CALCULATE THE ANGULAR SEPARATION FOR EACH ROW for row in rows: if "guide_star" in self.tableName: # Guide star cat RA and DEC are in RADIANS ra2 = math.degrees( row[self.colMaps[self.tableName]["raColName"]]) dec2 = math.degrees( row[self.colMaps[self.tableName]["decColName"]]) else: ra2 = row[self.colMaps[self.tableName]["raColName"]] dec2 = row[self.colMaps[self.tableName]["decColName"]] separation, northSep, eastSep = dat.get_angular_separation( log=self.log, ra1=self.ra, dec1=self.dec, ra2=ra2, dec2=dec2 ) self.results.append([separation, row]) # SORT BY SEPARATION from operator import itemgetter self.results = sorted(self.results, key=itemgetter(0)) # IF NEAREST ONLY REQUESTED if self.nearestOnly == True: self.results = [self.results[0]] else: tableName = self.tableName self.message = "No matches from %(tableName)s." % locals() self.log.debug( 'completed the ``_grab_conesearch_results_from_db`` method') return None
def add_HTMIds_to_mysql_tables( raColName, declColName, tableName, dbConn, log, primaryIdColumnName="primaryId"): """*Calculate and append HTMId info to a mysql db table containing ra and dec columns* ****Key Arguments:**** - ``raColName`` -- ra in sexegesimal - ``declColName`` -- dec in sexegesimal - ``tableName`` -- name of table to add htmid info to - ``dbConn`` -- database hosting the above table - ``log`` -- logger - ``primaryIdColumnName`` -- the primary id for the table **Return:** - ``None``""" ## IMPORTS ## import pymysql as ms import dryxPython.mysql as m from dryxPython.kws import utils as u # TEST TABLE EXIST sqlQuery = """show tables""" rows = m.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) tableList = [] for row in rows: tableList.extend(row.values()) if tableName not in tableList: message = "The %s table does not exist in the database" % (tableName,) log.critical(message) raise IOError(message) # TEST COLUMNS EXISTS cursor = dbConn.cursor(ms.cursors.DictCursor) sqlQuery = """SELECT * FROM %s LIMIT 1""" % (tableName,) cursor.execute(sqlQuery) rows = cursor.fetchall() desc = cursor.description existingColumns = [] for i in range(len(desc)): existingColumns.append(desc[i][0]) if (raColName not in existingColumns) or (declColName not in existingColumns): message = 'Please make sure you have got the naes of the RA and DEC columns correct' log.critical(message) raise IOError(message) # ACTION(S) ## htmCols = { 'htm16ID': 'BIGINT(20)', 'htm20ID': 'BIGINT(20)', 'cx': 'DOUBLE', 'cy': 'DOUBLE', 'cz': 'DOUBLE', } # CHECK IF COLUMNS EXISTS YET - IF NOT CREATE FROM for key in htmCols.keys(): try: log.debug( 'attempting to check and generate the HTMId columns for the %s db table' % (tableName, )) colExists = \ """SELECT * FROM information_schema.COLUMNS WHERE TABLE_SCHEMA=DATABASE() AND COLUMN_NAME='%s' AND TABLE_NAME='%s'""" \ % (key, tableName) colExists = m.execute_mysql_read_query( colExists, dbConn, log, ) switch = 0 if not colExists: if switch == 0: print "Adding the HTMCircle columns to %(tableName)s" % locals() switch = 1 sqlQuery = 'ALTER TABLE ' + tableName + ' ADD ' + \ key + ' ' + htmCols[key] + ' DEFAULT NULL' m.execute_mysql_write_query( sqlQuery, dbConn, log, ) except Exception as e: log.critical('could not check and generate the HTMId columns for the %s db table - failed with this error: %s ' % (tableName, str(e))) return -1 # COUNT ROWS WHERE HTMIDs ARE NOT SET sqlQuery = """SELECT count(*) as count from %(tableName)s where %(raColName)s is not null and ((htm16ID is NULL or htm16ID = 0))""" % locals( ) rowCount = m.execute_mysql_read_query( sqlQuery, dbConn, log, ) totalCount = rowCount[0]["count"] # ADD HTMIDs IN BATCHES batchSize = 2500 total = totalCount batches = int(total / batchSize) count = 0 # NOW GENERATE THE HTMLIds FOR THESE ROWS for i in range(batches + 1): if total == 0: continue count += batchSize if count > batchSize: # Cursor up one line and clear line sys.stdout.write("\x1b[1A\x1b[2K") if count > totalCount: count = totalCount print "%(count)s / %(totalCount)s htmIds added to %(tableName)s" % locals() # SELECT THE ROWS WHERE THE HTMIds ARE NOT SET sqlQuery = """SELECT %s, %s, %s from %s where %s is not null and ((htm16ID is NULL or htm16ID = 0)) limit %s""" % ( primaryIdColumnName, raColName, declColName, tableName, raColName, batchSize) batch = m.execute_mysql_read_query( sqlQuery, dbConn, log, ) raList = [] decList = [] pIdList = [] raList[:] = [r[raColName] for r in batch] decList[:] = [r[declColName] for r in batch] pIdList[:] = [r[primaryIdColumnName] for r in batch] from HMpTy import htm mesh16 = htm.HTM(16) mesh20 = htm.HTM(20) htm16Ids = mesh16.lookup_id(raList, decList) htm20Ids = mesh20.lookup_id(raList, decList) sqlQuery = "" for h16, h20, pid, r, d in zip(htm16Ids, htm20Ids, pIdList, raList, decList): # CALCULATE CARTESIANS raRad = math.radians(r) decRad = math.radians(d) cos_dec = math.cos(decRad) cx = math.cos(raRad) * cos_dec cy = math.sin(raRad) * cos_dec cz = math.sin(decRad) sqlQuery += \ """UPDATE %s SET htm16ID=%s, htm20ID=%s, cx=%s, cy=%s, cz=%s where %s = '%s';\n""" \ % ( tableName, h16, h20, cx, cy, cz, primaryIdColumnName, pid ) try: if len(sqlQuery): log.debug( 'attempting to update the HTMIds for new objects in the %s db table' % (tableName, )) m.execute_mysql_write_query( sqlQuery, dbConn, log, ) else: log.debug( 'no HTMIds to add to the %s db table' % (tableName, )) except Exception as e: log.critical('could not update the HTMIds for new objects in the %s db table - failed with this error: %s ' % (tableName, str(e))) return -1 # APPLY INDEXES IF NEEDED try: sqlQuery = u""" ALTER TABLE %(tableName)s ADD INDEX `idx_htm20ID` (`htm20ID` ASC); ALTER TABLE %(tableName)s ADD INDEX `idx_htm16ID` (`htm16ID` ASC); """ % locals() m.execute_mysql_write_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) except Exception, e: log.info('no index needed on table: %(e)s' % locals())
def sqlquery_to_csv_file( dbConn, log, sqlQuery, csvType="human", csvTitle="data exported from database", csvFilename="data exported from database.txt", returnFormat="plainText"): """ *human readable* **Key Arguments:** - ``dbConn`` -- mysql database connection - ``log`` -- logger - ``sqlQuery`` -- the sqlQuery to convert to csv - ``csvType`` -- "human" or "machine" - ``csvTitle`` -- title for the exported data - ``csvFilename`` -- the filename for the csv file to be downloaded - ``returnFormat`` -- what format to return the data [ plainText | webpage | downloadLink ] **Return:** - None .. todo:: - @review: when complete, clean sqlquery_to_csv_file function - @review: when complete add logging - @review: when complete, decide whether to abstract function to another module """ ################ > IMPORTS ################ ## STANDARD LIB ## ## THIRD PARTY ## ## LOCAL APPLICATION ## log.debug('starting the ``sqlquery_to_csv_file`` function') rows = dms.execute_mysql_read_query( sqlQuery=sqlQuery, dbConn=dbConn, log=log ) tableColumnNames = rows[0].keys() tableColumnNames.sort() columnWidths = [] columnWidths[:] = [len(tableColumnNames[i]) for i in range(len(tableColumnNames))] output = io.BytesIO() # setup csv styles if csvType == "machine": delimiter = "," elif csvType == "human": delimiter = "|" writer = csv.writer(output, dialect='excel', delimiter=delimiter, quotechar='"', quoting=csv.QUOTE_MINIMAL) dividerWriter = csv.writer(output, dialect='excel', delimiter="+", quotechar='"', quoting=csv.QUOTE_MINIMAL) # add column names to csv header = [] divider = [] allRows = [] # clean up data for row in rows: for c in tableColumnNames: if isinstance(row[c], float) or isinstance(row[c], long) or isinstance(row[c], Decimal): row[c] = "%0.2f" % row[c] elif isinstance(row[c], datetime): thisDate = str(row[c])[:10] row[c] = "%(thisDate)s" % locals() # set the column widths for row in rows: for i, c in enumerate(tableColumnNames): if len(str(row[c])) > columnWidths[i]: columnWidths[i] = len(str(row[c])) # fill in the data for row in rows: thisRow = [] # table border for human readable if csvType == "human": thisRow.append("") for i, c in enumerate(tableColumnNames): if csvType == "human": row[c] = str(str(row[c]).ljust(columnWidths[i] + 2) .rjust(columnWidths[i] + 3)) thisRow.append(row[c]) # table border for human readable if csvType == "human": thisRow.append("") allRows.append(thisRow) # table borders for human readable if csvType == "human": header.append("") divider.append("") for i, c in enumerate(tableColumnNames): if csvType == "machine": header.append(c) elif csvType == "human": header.append( c.ljust(columnWidths[i] + 2).rjust(columnWidths[i] + 3)) divider.append('-' * (columnWidths[i] + 3)) # table border for human readable if csvType == "human": header.append("") divider.append("") if csvType == "machine": writer.writerow(header) elif csvType == "human": dividerWriter.writerow(divider) writer.writerow(header) dividerWriter.writerow(divider) # write out the data writer.writerows(allRows) # table border for human readable if csvType == "human": dividerWriter.writerow(divider) now = datetime.now() now = now.strftime("%Y-%m-%d %H:%M:%S") output = output.getvalue() if csvTitle and csvTitle.lower() != "false": output = """%(csvTitle)s (exported on %(now)s)\n%(output)s""" % locals( ) now = datetime.now() now = now.strftime("%Y%m%dt%H%M%S") csvFilename = csvFilename.replace(" ", "_") filename = """%(csvFilename)s_%(now)s.txt""" % locals() matchObject = re.search( r"^(.*)\.(.*)$", csvFilename, flags=0 # re.S ) if matchObject: filename = matchObject.group(1) ext = matchObject.group(2) filename = """%(filename)s_%(now)s.%(ext)s""" % locals() output = output.strip() ################ >ACTION(S) ############### if returnFormat == "plainText": returnOutput = output elif returnFormat == "webpageView": webpage = dhf.htmlDocument( contentType="text/plain", content=output ) returnOutput = webpage elif returnFormat == "webpageDownload": webpage = dhf.htmlDocument( contentType="text/csv", content=output, attachmentSaveAsName=filename ) returnOutput = webpage log.debug('completed the ``sqlquery_to_csv_file`` function') return returnOutput
if(type(value) is list): log.debug('key: %s is a list (value %s)' % (key, value)) if(len(value) > 0): whatdataBaseDict[str(key)] = value[0] else: pass elif(value): log.debug('key: %s exists and is a %s (value %s)' % (key, type(value), value)) whatdataBaseDict[str(key)] = value else: pass sqlQuery = "select primaryId from " + feedTableName + \ " where voEventUrl = '" + feed['voeURL'] + "'" contextId = m.execute_mysql_read_query(sqlQuery, self.dbConn) contextId = contextId # ADD EXTRA COLUMNS TO THE DICTIONARY whatdataBaseDict['dateCreated'] = now whatdataBaseDict['dateLastModified'] = now whatdataBaseDict['awaitingAction'] = 1 whatdataBaseDict['voPrimaryId'] = contextId[0]['primaryId'] whatdataBaseDict['rssFeedName'] = feed['rssFeedName'] whatdataBaseDict['rssFeedSource'] = feed['rssFeedSource'] whatdataBaseDict['voEventUrl'] = feed['voeURL'] whatdataBaseDict['rssFeedName'] = feed['rssFeedName'] whatdataBaseDict['rssFeedSource'] = feed['rssFeedSource'] whatdataBaseTableName = self._voeTablePrefix + \ feed['rssFeedName'] + "_what_base"