def load_records(self, record_list, replace=True, source=None): ''' Loads the records in the list into the DB. This is transactional - either all or no records will be loaded. Includes the DN of the sender. ''' # All records in the list should be of the same type (but may not be), # unless they are Storage or GroupAttribute records which can be mixed. try: record_type = type(record_list[0]) # Check that all the records are the same type as the first (except # for Storage and GroupAttribute records). for record in record_list: if (type(record) != record_type and type(record) not in (StorageRecord, GroupAttributeRecord)): raise ApelDbException( "Not all records in list are of type %s." % record_type) if replace: proc = self.REPLACE_PROCEDURES[record_type] else: proc = self.INSERT_PROCEDURES[record_type] except KeyError: raise ApelDbException('No procedure found for %s; replace = %s' % (record_type, replace)) except IndexError: # no records to load return try: # prevent MySQLdb from raising # 'MySQL server has gone' exception self._mysql_reconnect() c = self.db.cursor(cursorclass=MySQLdb.cursors.DictCursor) for record in record_list: values = record.get_db_tuple(source) log.debug('Values: %s', values) if type(record) in (StorageRecord, GroupAttributeRecord): # These types can be found in the same record list, so need # to get the right proedure for each one. proc = self.REPLACE_PROCEDURES[type(record)] c.execute(proc, values) self.db.commit() except (MySQLdb.Warning, MySQLdb.Error, KeyError), err: log.error("Error loading records: %s", err) log.error("Transaction will be rolled back.") self.db.rollback() raise ApelDbException(err)
def __init__(self, host, port, username, pwd, db): ''' Initialise variables, and define stored procedures. ''' self._db_host = host self._db_port = port self._db_username = username self._db_pwd = pwd self._db_name = db self._summarise_jobs_proc = "SummariseJobs" self._summarise_vms_proc = "SummariseVMs" self._copy_summaries_proc = "CopySummaries" self._hep_spec_hist_proc = "CreateHepSpecHistory" self._join_records_proc = "JoinJobRecords" self._local_jobs_proc = "LocalJobs" self._spec_lookup_proc = "SpecLookup (%s, %s, %s, %s)" self._spec_update_proc = "CALL SpecUpdate (%s, %s, %s, %s, %s)" self._processed_clean = "CALL CleanProcessedFiles(%s)" try: self.db = MySQLdb.connect(host=self._db_host, port=self._db_port, user=self._db_username, passwd=self._db_pwd, db=self._db_name) except MySQLdb.Error, e: log.error('Error connecting to database: %s' % str(e)) raise ApelDbException(e)
def check_duplicate_sites(self): ''' Check that records from the same site are not in both the JobRecords table and the Summaries table. ''' try: # prevent MySQLdb from raising # 'MySQL server has gone' exception self._mysql_reconnect() c = self.db.cursor() c.execute( "select count(*) from JobRecords j inner join Summaries s using (SiteID)" ) conflict = c.fetchone()[0] > 0 if conflict: raise ApelDbException( "Records exist in both job and summary tables for the same site." ) self.db.commit() except MySQLdb.Error, e: log.error("A mysql error occurred: %s" % e) log.error("Any transaction will be rolled back.") if not self.db is None: self.db.rollback() raise
def _get_records(self, record_type, query_string, rec_number=1000): record_list = [] try: # prevent MySQLdb from raising # 'MySQL server has gone' exception self._mysql_reconnect() c = self.db.cursor(cursorclass=MySQLdb.cursors.SSDictCursor) c.execute(query_string) while True: for row in c.fetchmany(size=rec_number): record = record_type() # row is a dictionary {Field: Value} record.set_all(row) record_list.append(record) if len(record_list) > 0: yield record_list record_list = [] else: break except MySQLdb.Error, err: log.error('Error during getting records: %s' % (str(err))) log.error('Transaction will be rolled back.') self.db.rollback() raise ApelDbException(err)
def unload_gap(self, table_name, start, end, ur=False): ''' Unload all records from the JobRecords table whose EndTime falls within the provided dates (inclusive). ''' record_type = self.RECORD_TYPES[table_name] if record_type != JobRecord: raise ApelDbException("Can only gap publish for JobRecords.") start_tuple = [int(x) for x in start.split('-')] end_tuple = [int(x) for x in end.split('-')] # get the start of the start date start_date = datetime.date(*start_tuple) start_datetime = datetime.datetime.combine(start_date, datetime.time()) # get the end of the end date end_date = datetime.date(*end_tuple) end_datetime = datetime.datetime.combine(end_date, datetime.time()) end_datetime += datetime.timedelta(days=1) log.info('Finding records with end times between:') log.info(start_datetime) log.info(end_datetime) query = self._get_base_query(record_type) query.EndTime_gt = start_datetime query.EndTime_le = end_datetime msgs, records = self._write_messages(record_type, table_name, query, ur) return msgs, records
def load_records(self, record_list, source): ''' Loads the records in the list into the DB. This is transactional - either all or no records will be loaded. Includes the DN of the sender. ''' con = cx_Oracle.connect(self._db_connect_string) try: cur = con.cursor() # Insert the records into temporary tables. for record in record_list: proc = self.INSERT_PROCEDURES[type(record)] values = record.get_db_tuple(source) cur.callproc(proc, values) # Now merge the temporary tables into the actual tables. for k, v in self.MERGE_PROCEDURES.iteritems(): cur.callproc(v) con.commit() con.close() except (cx_Oracle.Warning, cx_Oracle.Error), err: log.error("Error loading records: %s", err) log.error("Transaction will be rolled back.") con.rollback() con.close() raise ApelDbException(err)
def _write_xml(self, records): ''' Write one message in the appropriate XML format to the outgoing message queue. This is currently enabled only for CAR. ''' buf = StringIO.StringIO() if type(records[0]) == JobRecord: XML_HEADER = '<?xml version="1.0" ?>' UR_OPEN = '<urf:UsageRecords xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/computerecord car_v1.2.xsd">' UR_CLOSE = '</urf:UsageRecords>' # elif type(records[0]) == SummaryRecord: # XML_HEADER = '<?xml version="1.0" ?>' # UR_OPEN = '<aur:SummaryRecords xmlns:aur="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord" xmlns:urf="http://eu-emi.eu/namespaces/2012/11/computerecord" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://eu-emi.eu/namespaces/2012/11/aggregatedcomputerecord ">' # UR_CLOSE = '</aur:SummaryRecords>' else: raise ApelDbException('Can only send URs for JobRecords.') buf.write(XML_HEADER + '\n') buf.write(UR_OPEN + '\n') buf.write('\n'.join( [record.get_ur(self._withhold_dns) for record in records])) buf.write('\n' + UR_CLOSE + '\n') self._msgq.add(buf.getvalue()) buf.close() del buf
def _write_messages(self, record_type, table_name, query, ur): ''' Write messsages for all the records found in the specified table, according to the logic contained in the query object. ''' if self._withhold_dns and record_type not in self.MAY_WITHHOLD_DNS: raise ApelDbException('Cannot withhold DNs for %s' % record_type.__name__) if record_type == StorageRecord and not ur: raise ApelDbException('Cannot unload StorageRecords in APEL format') msgs = 0 records = 0 for batch in self._db.get_records(record_type, table_name, query=query): records += len(batch) if ur: self._write_xml(batch) else: self._write_apel(batch) msgs += 1 return msgs, records
def test_connection(self): ''' Tests the DB connection. ''' try: con = cx_Oracle.connect(self._db_connect_string) log.info('Connected to: ' + self._db_log_string) log.info('Oracle Version: ' + con.version) con.close() except Exception, e: raise ApelDbException('Failed to connect to database: ' + str(e))
def load_records(self, record_list, replace=True, source=None): ''' Loads the records in the list into the DB. This is transactional - either all or no records will be loaded. Includes the DN of the sender. ''' # all records in the list are the same type try: record_type = type(record_list[0]) if replace: proc = self.REPLACE_PROCEDURES[record_type] else: proc = self.INSERT_PROCEDURES[record_type] except KeyError: raise ApelDbException('No procedure found for %s; replace = %s' % (record_type, replace)) except IndexError: # no records to load return try: # prevent MySQLdb from raising # 'MySQL server has gone' exception self._mysql_reconnect() c = self.db.cursor(cursorclass=MySQLdb.cursors.DictCursor) for record in record_list: values = record.get_db_tuple(source) log.debug('Values: %s' % str(values)) c.execute(proc, values) self.db.commit() except (MySQLdb.Warning, MySQLdb.Error, KeyError), err: log.error("Error loading records: %s" % str(err)) log.error("Transaction will be rolled back.") self.db.rollback() raise ApelDbException(err)
def _mysql_reconnect(self): ''' Checks if current connection is active and reconnects if necessary ''' try: self.db.ping() except MySQLdb.Error: try: self.db = MySQLdb.connect(host=self._db_host, port=self._db_port, user=self._db_username, passwd=self._db_pwd, db=self._db_name) except MySQLdb.Error, e: log.error('Error connecting to database: %s', e) raise ApelDbException(e)
def test_connection(self): ''' Tests the DB connection - if it fails a MySQLdb.OperationalError will be thrown. ''' try: db = MySQLdb.connect(host=self._db_host, port=self._db_port, user=self._db_username, passwd=self._db_pwd, db=self._db_name) log.info('Connected to ' + self._db_host + ':' + str(self._db_port)) log.info('Database: ' + self._db_name + '; username: ' + self._db_username) db.close() except MySQLdb.OperationalError, e: raise ApelDbException("Failed to connect to database: " + str(e))
db = ApelDb(db_backend, db_hostname, db_port, db_username, db_password, db_name) log.info('Connected.') # This is all the summarising logic, contained in ApelMysqlDb() and the stored procedures. if db_type == 'cpu': # Make sure that records are not coming from the same site by two different routes # N.B. Disabled as check doesn't scale well and isn't that useful. # db.check_duplicate_sites() db.summarise_jobs() db.normalise_summaries() db.copy_summaries() elif db_type == 'cloud': db.summarise_cloud() else: raise ApelDbException('Unknown database type: %s' % db_type) log.info('Summarising complete.') log.info(LOG_BREAK) except ApelDbException, err: log.error('Error summarising: ' + str(err)) log.error('Summarising has been cancelled.') log.info(LOG_BREAK) sys.exit(1) if __name__ == '__main__': # Main method for running the summariser. ver = "APEL summariser %s.%s.%s" % __version__