def test_log_successful_export(self): """ Test logging of export results to the export history table. """ # @REVIEWED self.assertTrue(self.exporter.logSuccessfulExport(name = 'test_export', url = 'http://test_url', datetime = 0, size = 100)) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() self.assertTrue( dbUtil.executeSQL(cursor, 'select * from "ExportHistory" where ' 'timestamp = ' 'to_timestamp(0)')) self.assertEqual(len(cursor.fetchall()), 1, "There should only be one result row.") self.assertTrue( dbUtil.executeSQL(cursor, 'delete from "ExportHistory" where ' 'timestamp = to_timestamp(0)')) conn.commit()
class MECODBReader(object): """ Read records from a database. """ def __init__(self, testing = False): """ Constructor. :param testing: True if in testing mode. """ self.connector = MSGDBConnector() self.conn = MSGDBConnector(testing).connectDB() self.dbUtil = MSGDBUtil() self.dbName = self.dbUtil.getDBName(self.connector.dictCur) def selectRecord(self, conn, table, keyName, keyValue): """ Read a record in the database given a table name, primary key name, and value for the key. :param conn DB connection :param table DB table name :param keyName DB column name for primary key :param keyValue Value to be matched :returns: Row containing record data. """ print "selectRecord:" sql = """SELECT * FROM "%s" WHERE %s = %s""" % ( table, keyName, keyValue) dcur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor) self.dbUtil.executeSQL(dcur, sql) row = dcur.fetchone() return row def readingAndMeterCounts(self): """ Retrieve the reading and meter counts. :returns: Multiple lists containing the retrieved data. """ sql = """SELECT "Day", "Reading Count", "Meter Count" FROM count_of_readings_and_meters_by_day""" dcur = self.conn.cursor(cursor_factory = psycopg2.extras.DictCursor) self.dbUtil.executeSQL(dcur, sql) rows = dcur.fetchall() dates = [] meterCounts = [] readingCounts = [] for row in rows: dates.append(row[0]) readingCounts.append(row[1] / row[2]) meterCounts.append(row[2]) return dates, readingCounts, meterCounts
def test_log_successful_export(self): """ Test logging of export results to the export history table. """ # @REVIEWED self.assertTrue( self.exporter.logSuccessfulExport(name='test_export', url='http://test_url', datetime=0, size=100)) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() self.assertTrue( dbUtil.executeSQL( cursor, 'select * from "ExportHistory" where ' 'timestamp = ' 'to_timestamp(0)')) self.assertEqual(len(cursor.fetchall()), 1, "There should only be one result row.") self.assertTrue( dbUtil.executeSQL( cursor, 'delete from "ExportHistory" where ' 'timestamp = to_timestamp(0)')) conn.commit()
class MECODBDeleter(object): """ Provide delete routines for MECO DB. """ def __init__(self): """ Constructor. """ self.dbUtil = MSGDBUtil() def deleteRecord(self, conn, tableName, idText, idValue): """ Delete record from DB where record has an int-based serial number. param: tableName param: idText DB column name for record ID param: idValue Value of the ID to be deleted """ sql = """DELETE FROM "{}" where {} = {}""".format(tableName, idText, idValue) dictCur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor) self.dbUtil.executeSQL(dictCur, sql) conn.commit()
class MSGWeatherDataDupeChecker(object): """ Determine if a duplicate record exists based on the tuple (WBAN, Date, Time, StationType). """ def __init__(self, testing = False): """ Constructor. :param testing: Flag for testing mode. """ self.logger = SEKLogger(__name__, 'debug') self.dbUtil = MSGDBUtil() def duplicateExists(self, dbCursor, wban, datetime, recordType): """ Check for the existence of a duplicate record. :param dbCursor :param wban :param datetime :param recordType :returns: True if a duplicate record exists, otherwise False. """ tableName = "WeatherNOAA" sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % ( tableName, wban, datetime, recordType) self.logger.log("sql=%s" % sql, 'debug') self.logger.log("wban=%s, datetime=%s, record_type=%s" % ( wban, datetime, recordType), 'debug') self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False
class MSGWeatherDataDupeChecker(object): """ Determine if a duplicate record exists based on the tuple (WBAN, Date, Time, StationType). """ def __init__(self, testing=False): """ Constructor. :param testing: Flag for testing mode. """ self.logger = SEKLogger(__name__, 'debug') self.dbUtil = MSGDBUtil() def duplicateExists(self, dbCursor, wban, datetime, recordType): """ Check for the existence of a duplicate record. :param dbCursor :param wban :param datetime :param recordType :returns: True if a duplicate record exists, otherwise False. """ tableName = "WeatherNOAA" sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % ( tableName, wban, datetime, recordType) self.logger.log("sql=%s" % sql, 'debug') self.logger.log( "wban=%s, datetime=%s, record_type=%s" % (wban, datetime, recordType), 'debug') self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False
def insertData(files, table, cols, testing = False): """ Insert aggregated data generated by this script into a database table. :param files: A list of the filenames to be processed. :param table: The name of the table in the DB. :param cols: A list of the columns (as strings) in the table. :param testing: Specify whether to use test (false by default). """ connector = MSGDBConnector() conn = connector.connectDB() dbUtil = MSGDBUtil() cursor = conn.cursor() cnt = 0 for file in files: with open(file, 'r') as csvfile: reader = csv.reader(csvfile, delimiter = ',') # Skip the header line. reader.next() for row in reader: sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( table, ','.join(cols), ','.join("'" + item.strip() + "'" for item in row)) sql = sql.replace("'NULL'", 'NULL') dbUtil.executeSQL(cursor, sql) cnt += 1 if cnt % 10000 == 0: conn.commit() conn.commit() cnt = 0
def insertData(files, table, cols): """ Insert aggregated data generated by this script into a database table. :param files: A list of the filenames to be processed. :param table: The name of the table in the DB. :param cols: A list of the columns (as strings) in the table. :param testing: Specify whether to use test """ connector = MSGDBConnector() conn = connector.connectDB() dbUtil = MSGDBUtil() cursor = conn.cursor() cnt = 0 for file in files: with open(file, 'rb') as csvfile: myReader = csv.reader(csvfile, delimiter=',') # Skip the header line. myReader.next() for row in myReader: print row sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( table, ','.join(cols), ','.join("'" + item.strip() + "'" for item in row)) sql = sql.replace("'NULL'", 'NULL') dbUtil.executeSQL(cursor, sql) cnt += 1 if cnt % 10000 == 0: conn.commit() conn.commit() cnt = 0
class MECODBDeleter(object): """ Provide delete routines for MECO DB. """ def __init__(self): """ Constructor. """ self.dbUtil = MSGDBUtil() def deleteRecord(self, conn, tableName, idText, idValue): """ Delete record from DB where record has an int-based serial number. param: tableName param: idText DB column name for record ID param: idValue Value of the ID to be deleted """ sql = """delete from "%s" where %s = %s""" % (tableName, idText, idValue) dictCur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) self.dbUtil.executeSQL(dictCur, sql) conn.commit()
def countOfDBExports(self, since = None): """ :param since: datetime indicating last export datetime. :return: Int of count of exports. """ myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S') if not since: since = myDatetime('1900-01-01 00:00') self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG') sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \ '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format( since.strftime('%Y-%m-%d %H:%M')) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() rows = None if dbUtil.executeSQL(cursor, sql, exitOnFail = False): rows = cursor.fetchall() assert len(rows) == 1, 'Invalid return value.' return rows[0][0]
def countOfDBExports(self, since=None): """ :param since: datetime indicating last export datetime. :return: Int of count of exports. """ myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S') if not since: since = myDatetime('1900-01-01 00:00') self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG') sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \ '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format( since.strftime('%Y-%m-%d %H:%M')) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() rows = None if dbUtil.executeSQL(cursor, sql, exitOnFail=False): rows = cursor.fetchall() assert len(rows) == 1, 'Invalid return value.' return rows[0][0]
def logSuccessfulExport(self, name = '', url = '', datetime = 0, size = 0): """ When an export has been successful, log information about the export to the database. The items to log include: * filename * URL * timestamp * filesize :param name: String :param url: String :param datetime: :param size: Int :return: True if no errors occurred, else False. """ def exportHistoryColumns(): return ['name', 'url', 'timestamp', 'size'] timestamp = lambda \ datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \ "'{}'".format( datetime) sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format( self.configer.configOptionValue('Export', 'export_history_table'), ','.join(exportHistoryColumns()), "'" + name + "'", "'" + url + "'", timestamp(datetime), size) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() result = dbUtil.executeSQL(cursor, sql, exitOnFail = False) conn.commit() return result
def logSuccessfulExport(self, name='', url='', datetime=0, size=0): """ When an export has been successful, log information about the export to the database. The items to log include: * filename * URL * timestamp * filesize :param name: String :param url: String :param datetime: :param size: Int :return: True if no errors occurred, else False. """ def exportHistoryColumns(): return ['name', 'url', 'timestamp', 'size'] timestamp = lambda \ datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \ "'{}'".format( datetime) sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format( self.configer.configOptionValue('Export', 'export_history_table'), ','.join(exportHistoryColumns()), "'" + name + "'", "'" + url + "'", timestamp(datetime), size) conn = MSGDBConnector().connectDB() cursor = conn.cursor() dbUtil = MSGDBUtil() result = dbUtil.executeSQL(cursor, sql, exitOnFail=False) conn.commit() return result
class MSGNOAAWeatherDataInserter(object): """ Performs weather data insertion to a database. """ def __init__(self, testing=False): """ Constructor. :param testing: True if testing mode is being used. """ self.logger = SEKLogger(__name__, 'info') self.dbUtil = MSGDBUtil() self.dupeChecker = MSGWeatherDataDupeChecker() def insertDataDict(self, conn, tableName, listOfDataDicts, commit=False): """ Given a table name and a dictionary of column names and values, insert them to the db. :param conn: A database connection. :param tableName: Name of the DB table to be inserted to. :param columnsAndValues: Dictionary of columns and values to be inserted to the DB. :param (optional) commit: A flag indicated that DB transactions will be committed. :returns: Set of datetimes processed. """ cur = conn.cursor() processedDateTimes = set() for row in listOfDataDicts: # Add a creation timestamp using the SQL function. row['created'] = 'NOW()' cols = [] vals = [] for col in row.keys(): # Prepare the columns and values for insertion via SQL. cols.append(col) if (row[col] != 'NULL'): # Surround each value with single quotes... vals.append("'%s'" % row[col]) else: # Except for NULL values. vals.append("%s" % row[col]) sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( tableName, ','.join(cols), ','.join(vals)) if self.dupeChecker.duplicateExists(cur, row['wban'], row['datetime'], row['record_type']): self.logger.log("Dupe found, dropping dupe.", 'info') else: processedDateTimes.add( dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M")) if self.dbUtil.executeSQL(cur, sql, exitOnFail=False) is False: # An error occurred. for col in sorted(row.keys()): print "%s: %s" % (col, row[col]) sys.exit(-1) if commit: try: conn.commit() except: self.logger.log("ERROR: Commit failed.", 'debug') return processedDateTimes
class MECODupeChecker(object): """ Check for duplicate data in the database. """ def __init__(self): """ Constructor. """ self.logger = SEKLogger(__name__, 'debug') self.mecoConfig = MSGConfiger() self.currentReadingID = 0 self.dbUtil = MSGDBUtil() def getLastElement(self, rows): """ Get the last element in a collection. Example: rows = (element1, element2, element3) getLastElement(rows) # return element3 :param rows Result froms from a query :return last element in the collection """ for i, var in enumerate(rows): if i == len(rows) - 1: return var def eventBranchDupeExists(self, conn, meterName, eventTime): """ :param conn: Database connection. :param meterName: Meter name in MeterData table. :param eventTime: Timestamp of event. :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() sql = """SELECT "Event".event_time, "MeterData".meter_data_id, "EventData".event_data_id FROM ( ( "MeterData" JOIN "EventData" ON ( ( "MeterData".meter_data_id = "EventData" .meter_data_id ) ) ) JOIN "Event" ON ( ( "EventData".event_data_id = "Event" .event_data_id ) ) ) WHERE "MeterData".meter_name = '%s' AND "Event".event_time = '%s' """ % (meterName, eventTime) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False def registerBranchDupeExists(self, conn, meterName, readTime, registerNumber, DEBUG = False): """ Determine if a register branch duplicate exists for a given meter name, read time, number tuple. :param conn: Database connection. :param meterName: Meter name in MeterData table. :param readTime: Read time in RegisterRead table. :param registerNumber: Corresponds to DB column "number". :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() sql = """SELECT "public"."MeterData".meter_name, "public"."RegisterRead".read_time, "public"."Register"."number" FROM "public"."MeterData" INNER JOIN "public"."RegisterData" ON "public" ."MeterData".meter_data_id = "public" ."RegisterData".meter_data_id INNER JOIN "public"."RegisterRead" ON "public"."RegisterData" .register_data_id = "public" ."RegisterRead".register_data_id INNER JOIN "public"."Tier" ON "public"."RegisterRead" .register_read_id = "public"."Tier" .register_read_id INNER JOIN "public"."Register" ON "public"."Tier".tier_id = "public"."Register".tier_id WHERE "public"."MeterData".meter_name = '%s' AND "public"."RegisterRead".read_time = '%s' AND "public"."Register".number = '%s' """ % (meterName, readTime, registerNumber) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False def readingBranchDupeExists(self, conn, meterName, endTime, channel = None, DEBUG = False): """ Duplicate cases: 1. Tuple (meterID, endTime) exists in the database. @DEPRECATED in favor of (2), full meterName-endTime-channel query. 2. Tuple (meterID, endTime, channel) exists in the database. :param conn: Database connection. :param meterName: Meter name in MeterData table. :param endTime: End time in Interval table. :param channel: Required parameter that was previously optional. An optional channel is now deprecated. :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() if DEBUG: print "readingBranchDupeExists():" if channel != None: sql = """SELECT "Interval".end_time, "MeterData".meter_name, "MeterData".meter_data_id, "Reading".channel, "Reading".reading_id FROM "MeterData" INNER JOIN "IntervalReadData" ON "MeterData" .meter_data_id = "IntervalReadData".meter_data_id INNER JOIN "Interval" ON "IntervalReadData" .interval_read_data_id = "Interval".interval_read_data_id INNER JOIN "Reading" ON "Interval".interval_id = "Reading" .interval_id WHERE "Interval".end_time = '%s' and meter_name = '%s' and channel = '%s'""" % ( endTime, meterName, channel) else: # deprecated query sql = """SELECT "Interval".end_time, "MeterData".meter_name, "MeterData".meter_data_id FROM "MeterData" INNER JOIN "IntervalReadData" ON "MeterData" .meter_data_id = "IntervalReadData".meter_data_id INNER JOIN "Interval" ON "IntervalReadData" .interval_read_data_id = "Interval".interval_read_data_id WHERE "Interval".end_time = '%s' and meter_name = '%s'""" % ( endTime, meterName) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: assert len( rows) < 2, "Dupes should be less than 2, found %s: %s." % ( len(rows), rows) self.currentReadingID = self.getLastElement(rows[0]) self.logger.log('Reading ID = %s.' % self.currentReadingID, 'silent') self.logger.log( "Duplicate found for meter %s, end time %s, channel %s." % ( meterName, endTime, channel), 'silent') return True else: self.logger.log( "Found no rows for meter %s, end time %s, channel %s." % ( meterName, endTime, channel), 'silent') return False def readingValuesAreInTheDatabase(self, conn, readingDataDict): """ Given a reading ID, verify that the values associated are present in the database. Values are from the columns: 1. channel 2. raw_value 3. uom 4. value :param dictionary containing reading values :return True if the existing values are the same, otherwise return False """ dbCursor = conn.cursor() sql = """SELECT "Reading".reading_id, "Reading".channel, "Reading".raw_value, "Reading".uom, "Reading"."value" FROM "Reading" WHERE "Reading".reading_id = %s""" % ( self.currentReadingID) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if self.currentReadingID == 0: return False # assert len(rows) == 1 or len(rows) == 0 assert len( rows) == 1, "Didn't find a matching reading for reading ID %s." %\ self.currentReadingID if len(rows) == 1: self.logger.log("Found %s existing matches." % len(rows), 'silent') allEqual = True if int(readingDataDict['Channel']) == int(rows[0][1]): print "channel equal," else: self.logger.log("channel not equal: %s,%s,%s" % ( int(readingDataDict['Channel']), int(rows[0][1]), readingDataDict['Channel'] == rows[0][1]), 'debug') allEqual = False if int(readingDataDict['RawValue']) == int(rows[0][2]): print "raw value equal," else: self.logger.log("rawvalue not equal: %s,%s,%s" % ( int(readingDataDict['RawValue']), int(rows[0][2]), readingDataDict['RawValue'] == rows[0][2]), 'debug') allEqual = False if readingDataDict['UOM'] == rows[0][3]: print "uom equal," else: self.logger.log("uom not equal: %s,%s,%s" % ( readingDataDict['UOM'], rows[0][3], readingDataDict['UOM'] == rows[0][3]), 'debug') allEqual = False if self.approximatelyEqual(float(readingDataDict['Value']), float(rows[0][4]), 0.001): self.logger.log("value equal", 'silent') else: self.logger.log("value not equal: %s,%s,%s" % ( float(readingDataDict['Value']), float(rows[0][4]), readingDataDict['Value'] == rows[0][4]), 'debug') allEqual = False if allEqual: return True else: return False else: return False def approximatelyEqual(self, a, b, tolerance): return abs(a - b) < tolerance
] lineCnt = 0 with open(filename) as tsv: for line in csv.reader(tsv, delimiter="\t"): if lineCnt != 0: data = line[0:66] for i in range(0, 66): if len(data[i]) == 0: data[i] = "NULL" else: data[i] = "'" + data[i] + "'" sql = """INSERT INTO "MeterRecords" (%s) VALUES (%s)""" % (",".join(cols), ",".join(data)) dbUtil.executeSQL(cur, sql) lineCnt += 1 conn.commit() msg = "Processed %s lines.\n" % lineCnt sys.stderr.write(msg) msgBody += msg notifier.sendNotificationEmail(msgBody)
class NewDataAggregator(object): """ Perform aggregation of new data for a set of predefined data types (self .rawTypes). """ def __init__(self): """ Constructor. """ self.logger = MSGLogger(__name__, 'DEBUG') self.aggregator = MSGDataAggregator() self.notifier = MSGNotifier() self.rawTypes = ['weather', 'egauge', 'circuit', 'irradiance'] self.connector = MSGDBConnector() self.conn = self.connector.connectDB() self.cursor = self.conn.cursor() self.dbUtil = MSGDBUtil() def lastReportDate(self, notificationType): """ Get the last time a notification was reported. :param notificationType: string indicating the type of the notification. It is stored in the event history. :returns: datetime of last report date. """ cursor = self.cursor sql = """SELECT MAX("notificationTime") FROM "{}" WHERE "notificationType" = '{}'""".format(NOTIFICATION_HISTORY_TABLE, notificationType) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return None else: return rows[0][0] else: raise Exception('Exception during getting last report date.') def sendNewDataNotification(self, result = None, testing = False): """ Sending notification reporting on new data being available since the last time new data was reported. :param result: list of dicts containing aggregation results as provided by MSGDataAggregator::aggregateNewData. :param testing: Use testing mode when True. """ self.logger.log('result {}'.format(result), 'debug') lastReportDate = self.lastReportDate(NOTIFICATION_HISTORY_TYPE) if not lastReportDate: lastReportDate = "never" if not result: msgBody = '\nNew data has NOT been aggregated in {}. No result ' \ 'was obtained. This is an error that should be ' \ 'investigated.'.format(self.connector.dbName) else: msgBody = '\nNew data has been aggregated in {}.'.format( self.connector.dbName) msgBody += '\n\n' for i in range(len(result)): msgBody += 'The new data count for type {} is {} readings' \ '.\n'.format(result[i].keys()[0], result[i].values()[0]) msgBody += '\n\n' msgBody += 'The last report date was %s.' % lastReportDate msgBody += '\n\n' self.notifier.sendNotificationEmail(msgBody, testing = testing) self.saveNotificationTime() def saveNotificationTime(self): """ Save a notification event to the notification history. """ cursor = self.cursor sql = """INSERT INTO "{}" ("notificationType", "notificationTime") VALUES ('{}', NOW())""".format(NOTIFICATION_HISTORY_TABLE, NOTIFICATION_HISTORY_TYPE) success = self.dbUtil.executeSQL(cursor, sql) self.conn.commit() if not success: raise Exception('Exception while saving the notification time.') def aggregateNewData(self): """ :return: list of dicts obtained from MSGDataAggregator::aggregateNewData. """ result = map(self.aggregator.aggregateNewData, self.rawTypes) self.logger.log('result {}'.format(result)) return result
if lineCnt != 0: # Skip header. data = line[0:len(cols)] # Overshoot columns to get the last column. for i in range(0, len(cols)): if len(data[i]) == 0: data[i] = 'NULL' else: # Escape single quotes with double single quotes in # PostgreSQL. data[i] = data[i].replace("'", "\'\'") data[i] = "'" + data[i] + "'" sql = """INSERT INTO "MeterLocationHistory" (%s) VALUES (%s)""" % ( ','.join(cols), ','.join(data)) logger.log("SQL: %s" % sql, 'debug') success = dbUtil.executeSQL(cur, sql) if not success: anyFailure = True lineCnt += 1 conn.commit() msg = ("Processed %s lines.\n" % lineCnt) sys.stderr.write(msg) msgBody += msg if not anyFailure: msg = "Finished inserting Meter Location History records.\n" sys.stderr.write(msg) msgBody += msg
lineCnt = 0 with open(filename) as tsv: for line in csv.reader(tsv, delimiter="\t"): if lineCnt != 0: data = line[0:66] for i in range(0, 66): if len(data[i]) == 0: data[i] = 'NULL' else: data[i] = "'" + data[i] + "'" sql = """INSERT INTO "MeterRecords" (%s) VALUES (%s)""" % ( ','.join(cols), ','.join(data)) dbUtil.executeSQL(cur, sql) lineCnt += 1 conn.commit() msg = ("Processed %s lines.\n" % lineCnt) sys.stderr.write(msg) msgBody += msg notifier.sendNotificationEmail(msgBody)
class MSGDataVerifier(object): """ Perform verification procedures related to data integrity. """ def __init__(self): """ Constructor. """ self.logger = SEKLogger(__name__, 'DEBUG') self.cursor = MSGDBConnector().connectDB().cursor() self.dbUtil = MSGDBUtil() def mecoReadingsDupeCount(self): """ Generate counts of MECO dupe readings. """ dupes = 0 startDate = lambda y, m: '%d-%02d-%02d' % (y, m, 1) endDate = lambda y, m: '%d-%02d-%02d' % ( y, m, calendar.monthrange(y, m)[1]) for y in YEARS: startDates = [startDate(y, m) for m in map(lambda x: x + 1, range(12))] endDates = [endDate(y, m) for m in map(lambda x: x + 1, range(12))] for start in startDates: cnt = self.__mecoReadingsDupeCount(start, endDates[ startDates.index(start)]) self.logger.log('start: %s, dupe cnt: %s' % (start, cnt), 'INFO') dupes += cnt return dupes def __mecoReadingsDupeCount(self, startDate, endDate): """ :param startDate: :param endDate: :returns: DB row count. """ self.dbUtil.executeSQL(self.cursor, """SELECT "Interval".end_time, "MeterData".meter_name, "Reading".channel FROM "MeterData" INNER JOIN "IntervalReadData" ON "MeterData" .meter_data_id = "IntervalReadData".meter_data_id INNER JOIN "Interval" ON "IntervalReadData" .interval_read_data_id = "Interval".interval_read_data_id INNER JOIN "Reading" ON "Interval".interval_id = "Reading" .interval_id WHERE "Interval".end_time BETWEEN '%s' and '%s' GROUP BY "MeterData".meter_name, "Interval".end_time, "Reading".channel HAVING (COUNT(*) > 1)""" % (startDate, endDate)) return len(self.cursor.fetchall()) def egaugeAggregationCount(self): """ There should not be more than 96 15-min interval endpoints within a single calendar day for a given sub ID. :return: """ pass
class MSGEgaugeNewDataChecker(object): """ Provide notification of newly loaded MSG eGauge data. This uses notification type MSG_EGAUGE_SERVICE. """ def __init__(self): """ Constructor. """ print __name__ self.logger = SEKLogger(__name__) self.connector = MSGDBConnector() self.dbUtil = MSGDBUtil() self.notifier = MSGNotifier() self.configer = MSGConfiger() def newDataCount(self): """ Measure the amount of new data that is present since the last time new data was reported. """ cursor = self.connector.conn.cursor() tableName = 'EgaugeEnergyAutoload' lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE') if lastTime is None: lastTime = '1900-01-01' sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % ( tableName, lastTime) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return 0 else: return rows[0][0] else: # @todo Raise an exception. return None def lastReportDate(self, notificationType): """ Get the last time a notification was reported. :param notificationType: A string indicating the type of the notification. It is stored in the event history. :returns: datetime of last report date. """ cursor = self.connector.conn.cursor() sql = """SELECT MAX("notificationTime") FROM "%s" WHERE "notificationType" = '%s'""" % (NOTIFICATION_HISTORY_TABLE, notificationType) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return None else: return rows[0][0] else: # @todo Raise an exception. return None def saveNotificationTime(self): """ Save the notification event to the notification history. """ cursor = self.connector.conn.cursor() sql = """INSERT INTO "%s" ("notificationType", "notificationTime") VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE success = self.dbUtil.executeSQL(cursor, sql) self.connector.conn.commit() if not success: # @todo Raise an exception. self.logger.log( 'An error occurred while saving the notification time.') def sendNewDataNotification(self, testing=False): """ Sending notification reporting on new data being available since the last time new data was reported. :param testing: Use testing mode when True. """ lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE') if not lastReportDate: lastReportDate = "never" msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\ .connector.dbName msgBody += '\n\n' msgBody += 'The new data count is %s readings.' % self.newDataCount() msgBody += '\n\n' msgBody += 'The last report date was %s.' % lastReportDate msgBody += '\n\n' self.notifier.sendNotificationEmail(msgBody, testing=testing) self.saveNotificationTime()
class MSGWeatherDataUtil(object): """ Utility methods for working with weather data. """ def __init__(self): """ Constructor. A database connection is not maintained here to keep this class lightweight. """ self.logger = SEKLogger(__name__, DEBUG) self.configer = MSGConfiger() self.url = self.configer.configOptionValue('Weather Data', 'weather_data_url') self.pattern = self.configer.configOptionValue('Weather Data', 'weather_data_pattern') self.fileList = [] self.dateList = [] # List of dates corresponding weather data files. self.fillFileListAndDateList() self.dbUtil = MSGDBUtil() def fillFileListAndDateList(self): """ Return a list of weather files obtained from the remote server used in processing weather data. """ response = urllib2.urlopen(self.url).read() self.logger.log('Filling file list:', DEBUG) for filename in re.findall(self.pattern, response): # Only examine first match group in the filename match. self.logger.log('filename {}'.format(filename[0]), DEBUG) self.fileList.append(filename[0]) self.dateList.append(self.datePart(filename[0])) def datePart(self, filename=None, datetime=None): """ Return the date part of a NOAA weather data filename. :param: String of the filename. :param: datetime object. :returns: String of the date part of the given parameter. """ assert filename == None or datetime == None, "One argument is allowed." if filename: newName = filename.replace("QCLCD", '') newName = newName.replace(".zip", '') return newName if datetime: return datetime.strftime('%Y-%m-%d') def getLastDateLoaded(self, cursor): """ Return the last date of loaded weather data. :returns: Last date. """ sql = """select wban, datetime, record_type from "%s" ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE self.dbUtil.executeSQL(cursor, sql) row = cursor.fetchone() # self.logger.log('Date last loaded = %s' % row[1], 'info') return row[1] def getKeepList(self, fileList, cursor): """ The Keep List is the list of filenames of files containing data that are *within* the month of the last loaded date or are beyond the last loaded date. :param: fileList: A list of files containing weather data. :param: DB cursor. :returns: List of weather data filenames to process. """ keepList = [] i = 0 for date in fileList: self.logger.log('Examining date %s.' % date) # The list date should be the last day of the month. # It is the date that is compared against the last retrieved date. listDate = dt.datetime.strptime(self.datePart(filename=date), "%Y%m") lastDay = calendar.monthrange(listDate.year, listDate.month)[1] listDate = dt.datetime.strptime( '%s-%s-%s' % (listDate.year, listDate.month, lastDay), "%Y-%m-%d") self.logger.log('List date = %s.' % listDate) lastDate = self.getLastDateLoaded(cursor) self.logger.log('last date = %s' % lastDate) if lastDate <= listDate: keepList.append((i, listDate)) i += 1 if keepList: keepList.sort() return [fileList[d[0]] for d in keepList]
class MSGDataAggregator(object): """ Use for continuous data aggregation of diverse data types relevant to the Maui Smart Grid project. Four data types are supported: 1. Irradiance 2. Temperature/Humidity (weather) 3. Circuit 4. eGauge The general data form conforms to 1. timestamp, subkey_id, val1, val2, val3, ... 2. timestamp, val1, val2, val3, ... Case (2) is handled within the same space as (1) by testing for the existence of subkeys. Current aggregation consists of averaging over **15-min intervals**. Aggregation is performed in-memory and saved to the DB. The time range is delimited by start date and end date where the values are included in the range. The timestamps for aggregation intervals are the last timestamp in a respective series. * Aggregation subkeys are values such as eGauge IDs or circuit numbers. Aggregation is being implemented externally for performance and flexibility advantages over alternative approaches such as creating a view. It may be rolled into an internal function at future time if that proves to be beneficial. Usage: from msg_data_aggregator import MSGDataAggregator aggregator = MSGDataAggregator() API: aggregateAllData(dataType = dataType) aggregateNewData(dataType = dataType) """ def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False): """ Constructor. :param testing: if True, the testing DB will be connected instead of the production DB. """ self.logger = SEKLogger(__name__, 'info') self.configer = MSGConfiger() self.conn = MSGDBConnector().connectDB() self.cursor = self.conn.cursor() self.dbUtil = MSGDBUtil() self.notifier = MSGNotifier() self.mathUtil = MSGMathUtil() self.timeUtil = MSGTimeUtil() self.nextMinuteCrossing = {} self.nextMinuteCrossingWithoutSubkeys = None self.exitOnError = exitOnError self.commitOnEveryInsert = commitOnEveryInsert section = 'Aggregation' tableList = [ 'irradiance', 'agg_irradiance', 'weather', 'agg_weather', 'circuit', 'agg_circuit', 'egauge', 'agg_egauge' ] self.dataParams = { 'weather': ('agg_weather', 'timestamp', ''), 'egauge': ('agg_egauge', 'datetime', 'egauge_id'), 'circuit': ('agg_circuit', 'timestamp', 'circuit'), 'irradiance': ('agg_irradiance', 'timestamp', 'sensor_id') } self.columns = {} # tables[datatype] gives the table name for datatype. self.tables = { t: self.configer.configOptionValue(section, '{}_table'.format(t)) for t in tableList } for t in self.tables.keys(): self.logger.log('t:{}'.format(t), 'DEBUG') try: self.columns[t] = self.dbUtil.columnsString( self.cursor, self.tables[t]) except TypeError as error: self.logger.log( 'Ignoring missing table: Error is {}.'.format(error), 'error') def existingIntervals(self, aggDataType='', timeColumnName=''): """ Retrieve the existing aggregation intervals for the given data type. :param aggDataType: string :param timeColumnName: string :return: List of intervals. """ return [ x[0] for x in self.rows( """SELECT {0} from \"{1}\" ORDER BY {2}""".format( timeColumnName, self.tables[aggDataType], timeColumnName)) ] def unaggregatedIntervalCount(self, dataType='', aggDataType='', timeColumnName='', idColumnName=''): """ Return count of unaggregated intervals for a given data type. :param dataType: :param aggDataType: :param timeColumnName: :param idColumnName: :return: int """ return len( self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName, idColumnName)) def lastAggregationEndpoint(self, aggDataType='', timeColumnName=''): """ Last aggregation endpoint for a given datatype. :param dataType: :param timeColumnName: :return: """ return self.existingIntervals(aggDataType=aggDataType, timeColumnName=timeColumnName)[-1] def unaggregatedEndpoints(self, dataType='', aggDataType='', timeColumnName='', idColumnName=''): """ Sorted (ascending) endpoints and their IDs, if available, for unaggregated intervals since the last aggregation endpoint for a given data type. This has a problem where an endpoint at 23:45:04 will be returned as 23:45:00. This makes the return value incorrect for raw data types having readings at sub-minute intervals such as data for circuit, irradiance and weather. This condition does not affect correct aggregation. Only the definition of the return value is wrong. :param dataType: string :param aggDataType: string :param timeColumnName: string :param idColName: string :return: list of datetimes. """ if idColumnName != '': # Key: # 0: raw # 1: agg # 2: time col # 3: id col # 4: last aggregated time sql = 'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON ' \ '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{' \ '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, ' \ '{3} ASC' self.logger.log('last agg endpoint: {}'.format( self.lastAggregationEndpoint(aggDataType, timeColumnName))) # The id column value is available in the tuple returned by # groupby but is not being used here. # @todo Exclude last endpoint if it is equal to the last # aggregation endpoint. # # The minute position filtering may be including the last # endpoint incorrectly because there are readings occurring # within the same minute as the final endpoint, e.g. 23:45:04, # 23:45:08, etc. # # This is not a problem with eGuage data due reading intervals # being every minute and zero seconds. return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [ k for k, v in groupby( map( lambda y: y[0].timetuple()[0:5], filter( lambda x: x[0].timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0, [( x[0], x[1]) for x in self.rows( sql.format( self.tables[dataType], self.tables[aggDataType], timeColumnName, idColumnName, self.lastAggregationEndpoint( aggDataType, timeColumnName)))]))) ]) else: # Key: # 0: raw # 1: agg # 2: time col # 3: last aggregated time sql = 'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}=' \ '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' ' \ 'ORDER BY {2} ASC' self.logger.log('last agg endpoint: {}'.format( self.lastAggregationEndpoint(aggDataType, timeColumnName))) return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [ k for k, v in groupby( map( lambda y: y.timetuple()[0:5], filter( lambda x: x.timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0, [(x[0]) for x in self.rows( sql.format( self.tables[dataType], self.tables[aggDataType], timeColumnName, self.lastAggregationEndpoint( aggDataType, timeColumnName)))]))) ]) def intervalCrossed(self, minute=None, subkey=None): """ Determine interval crossing. Intervals are at 0, 15, 45, 60 min. The interval size is determined by MECO source data. :param minute: The integer value of the minute. :param subkey: The name for the subkey used for aggregation. :returns: True if an interval was crossed, False otherwise. """ if not minute and minute != 0: raise Exception('Minute not defined.') intervalSize = 15 first = 0 last = 60 if subkey is not None: if minute >= self.nextMinuteCrossing[subkey] and minute <= last \ and \ self.nextMinuteCrossing[subkey] != first: self.nextMinuteCrossing[subkey] += intervalSize if self.nextMinuteCrossing[subkey] >= last: self.nextMinuteCrossing[subkey] = first self.logger.log('minute crossed at #1.', 'debug') return True elif self.nextMinuteCrossing[ subkey] == first and minute >= first and minute <= intervalSize: self.nextMinuteCrossing[subkey] = intervalSize self.logger.log('minute crossed at #2.', 'debug') return True return False else: if minute >= self.nextMinuteCrossingWithoutSubkeys and minute <= \ last and self.nextMinuteCrossingWithoutSubkeys != first: self.nextMinuteCrossingWithoutSubkeys += intervalSize if self.nextMinuteCrossingWithoutSubkeys >= last: self.nextMinuteCrossingWithoutSubkeys = first self.logger.log('minute crossed at #3.', 'debug') return True elif self.nextMinuteCrossingWithoutSubkeys == first and minute >=\ first and minute <= intervalSize: self.nextMinuteCrossingWithoutSubkeys = intervalSize self.logger.log('minute crossed at #4.', 'debug') return True return False def rows(self, sql): """ Rows from a SQL fetch. :param sql: Command to be executed. :returns: DB result set. """ self.logger.log('sql: {}'.format(sql), 'debug') self.dbUtil.executeSQL(self.cursor, sql) return self.cursor.fetchall() def rawData(self, dataType='', orderBy=None, timestampCol='', startDate='', endDate=''): """ Raw data to be aggregated. :param dataType: string :param orderBy: list :param timestampCol: string :param startDate: string :param endDate: string :returns: DB rows. """ # @todo Validate args. orderBy = filter(None, orderBy) return self.rows("""SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND '{}' ORDER BY {}""".format(self.columns[dataType], self.tables[dataType], timestampCol, startDate, endDate, ','.join(orderBy))) def subkeys(self, dataType='', timestampCol='', subkeyCol='', startDate='', endDate=''): """ The distinct subkeys for a given data type within a time range. Subkeys are fields such as egauge_id in eGauge data or sensor_id in irradiance data. :param dataType: string :param timestampCol: string :param subkeyCol: string :param startDate: string :param endDate: string :returns: List of subkeys """ return [ sk[0] for sk in self.rows("""SELECT DISTINCT({}) FROM "{}" WHERE {} BETWEEN '{}' AND '{}' ORDER BY {}""".format(subkeyCol, self.tables[dataType], timestampCol, startDate, endDate, subkeyCol)) ] def insertAggregatedData(self, agg=None): """ :param agg: MSGAggregatedData :return: None """ if not agg.columns: raise Exception('agg columns not defined.') if not agg.data: raise Exception('agg data not defined.') self.logger.log('agg data: {}'.format(agg.data)) self.logger.log('agg data type: {}'.format(type(agg.data))) def __insertData(values=''): """ Perform insert of data to the database using the given values. :param values: String containing values to be inserted. :return Nothing. """ sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format( self.tables[agg.aggregationType], ','.join(agg.columns), values) self.logger.log('sql: {}'.format(sql), 'debug') success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=self.exitOnError) # Used for a special case where data is reloaded. if self.commitOnEveryInsert: self.conn.commit() if not success and self.exitOnError: raise Exception('Failure during aggregated data insert.') for row in agg.data: if type(row) == type({}): # self.logger.log('row=%s' % row, 'debug') # self.logger.log('row type: %s' % type(row)) for key in row.keys(): values = '' valCnt = 0 for val in row[key]: if val == 'NULL': values += val elif type(val) == type(''): values += "'" + val.strip() + "'" elif isinstance(val, datetime): values += "'" + val.isoformat() + "'" elif type(val) == type(0): values += str(val) elif type(val) == type(0.0): values += str(val) else: values += val if valCnt < len(agg.columns) - 1: values += "," valCnt += 1 __insertData(values=values) elif type(row) == type([]): values = '' valCnt = 0 for val in row: if val == 'NULL': values += val elif type(val) == type(''): values += "'" + val.strip() + "'" elif isinstance(val, datetime): values += "'" + val.isoformat() + "'" elif type(val) == type(0): values += str(val) elif type(val) == type(0.0): values += str(val) else: values += val if valCnt < len(agg.columns) - 1: values += "," valCnt += 1 __insertData(values=values) else: self.logger.log('row = {}'.format(row), 'error') raise Exception('Row type not matched.') # End for row. self.conn.commit() def intervalAverages(self, sums, cnts, timestamp, timestampIndex, subkeyIndex=None, subkey=None): """ Aggregates all data for the current interval for the given subkey. For the case where there are no subkeys, subkeyIndex and subkey should be None. :param sums: list :param cnts: list :param timestamp: datetime :param timestampIndex: int :param subkeyIndex: int :param subkey: string :returns: Averaged data as a dict with form {subkey:data} """ if subkey is not None: myAvgs = {} reportedAgg = False myAvgs[subkey] = [] sumIndex = 0 self.logger.log('key: {}'.format(subkey), 'debug') # Iterate over sums. for s in sums[subkey]: if sumIndex == timestampIndex: myAvgs[subkey].append(timestamp) elif sumIndex == subkeyIndex: myAvgs[subkey].append(subkey) else: if cnts[subkey][sumIndex] != 0: if not reportedAgg: self.logger.log( 'Aggregating {} rows of data.'.format( cnts[subkey][sumIndex]), 'debug') reportedAgg = True myAvgs[subkey].append(s / cnts[subkey][sumIndex]) else: myAvgs[subkey].append('NULL') sumIndex += 1 return myAvgs else: myAvgs = [] reportedAgg = False sumIndex = 0 for s in sums: if sumIndex == timestampIndex: myAvgs.append(timestamp) else: if cnts[sumIndex] != 0: if not reportedAgg: self.logger.log( 'Aggregating {} rows of data.'.format( cnts[sumIndex]), 'debug') reportedAgg = True myAvgs.append(s / cnts[sumIndex]) else: myAvgs.append('NULL') sumIndex += 1 return myAvgs def dataParameters(self, dataType=''): """ Parameters for a given data type. :param dataType: string :return: (aggType, timeColName, subkeyColName) """ try: assert len(self.dataParams[dataType]) == 3 return self.dataParams[dataType] except: self.logger.log('Unmatched data type {}.'.format(dataType)) def aggregateAllData(self, dataType=''): """ Convenience method for aggregating all data for a given data type. Data is inserted to individual aggregated data tables. :param dataType: String in the list of raw data types. :return: Nothing. """ (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) for start, end in self.monthStartsAndEnds(timeColumnName=timeColName, dataType=dataType): self.logger.log('start, end: {}, {}'.format(start, end)) aggData = self.aggregatedData( dataType=dataType, aggregationType=aggType, timeColumnName=timeColName, subkeyColumnName=subkeyColName, startDate=start.strftime('%Y-%m-%d %H:%M:%S'), endDate=end.strftime('%Y-%m-%d %H:%M:%S')) self.insertAggregatedData(agg=aggData) for row in aggData.data: self.logger.log('aggData row: {}'.format(row)) def aggregateNewData(self, dataType=''): """ Convenience method for aggregating new data. :param dataType: :return: dict of {dataType: count of aggregation endpoints} """ # The new aggregation starting point is equal to the last aggregation # endpoint up to the last unaggregated endpoint. (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) (end, start) = \ self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1] self.logger.log( 'datatype: {}; start, end: {}, {}; end type: {}'.format( dataType, start, end, type(end)), 'critical') if type(end) == type(None): # No available unaggregated endpoints results in an empty list # for type egauge. The reason this does not work for other types is # because the other types of fractional minute readings and the # fractional minute readings are not being handled completely but # this method is still capable of working without problem. self.logger.log('Nothing to aggregate.') return {dataType: 0} if self.incrementEndpoint(start) >= end: self.logger.log('Nothing to aggregate.') return {dataType: 0} aggData = self.aggregatedData( dataType=dataType, aggregationType=aggType, timeColumnName=timeColName, subkeyColumnName=subkeyColName, startDate=self.incrementEndpoint(start).strftime( '%Y-%m-%d %H:%M:%S'), endDate=end.strftime('%Y-%m-%d %H:%M:%S')) self.insertAggregatedData(agg=aggData) for row in aggData.data: self.logger.log('aggData row: {}'.format(row)) self.logger.log('{} rows aggregated for {}.'.format( len(aggData.data), dataType)) return {dataType: len(aggData.data)} def incrementEndpoint(self, endpoint=None): """ Increment an endpoint by one interval where endpoints are the final timestamp in an aggregation interval. :param endpoint: the endpoint to be incremented. :return: datetime object that is the given endpoint + a predefined amount of minutes. """ plusOneInterval = relativedelta(minutes=15) return endpoint + plusOneInterval def lastUnaggregatedAndAggregatedEndpoints(self, dataType=''): """ Return the endpoints for the given data type in the form {datatype: (last unaggregated endpoint, last aggregated endpoint)}. :param dataType: :return: dict with tuple. """ self.logger.log('datatype {}'.format(dataType)) (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) self.logger.log('subkey colname {}'.format(subkeyColName)) unAggregatedEndpoints = self.unaggregatedEndpoints( dataType=dataType, aggDataType=aggType, timeColumnName=timeColName, idColumnName=subkeyColName) self.logger.log('unagg endpoints: {}'.format(unAggregatedEndpoints)) return { dataType: (unAggregatedEndpoints[-1] if unAggregatedEndpoints != [] else None, self.lastAggregationEndpoint(aggDataType=aggType, timeColumnName=timeColName)) } def aggregatedVsNewData(self): """ Convenience method. :return: dict of tuples containing {datatype:(last raw datetime, last agg datetime)} """ return { x.keys()[0]: (x.values()[0]) for x in map(self.lastUnaggregatedAndAggregatedEndpoints, [k for k in self.dataParams]) } def monthStartsAndEnds(self, timeColumnName='', dataType=''): """ Return first date and last date for the given **raw** data type for each month in the data's entire time range. The end date is incremented by on aggregation period to account for the data obtained at time 00:00. :param timeColumnName: string :param dataType: string :return: List of tuples. """ self.logger.log('datatype {}'.format(dataType), 'debug') (start, end) = self.rows("""SELECT MIN({}), MAX({}) FROM \"{}\"""".format( timeColumnName, timeColumnName, self.tables[dataType]))[0] self.logger.log('start {}'.format(start)) self.logger.log('end {}'.format(end)) # End time needs transforming in split dates to extend the end of the # day to 23:59:59. splitDates = self.timeUtil.splitDates(start, end) startEndDatesTransform = [] i = 0 while i < len(splitDates): startEndDatesTransform.append( (splitDates[i][0], self.incrementEndpoint( datetime(splitDates[i][1].timetuple()[0], splitDates[i][1].timetuple()[1], splitDates[i][1].timetuple()[2], 23, 59, 59)))) i += 1 return startEndDatesTransform def aggregatedData(self, dataType='', aggregationType='', timeColumnName='', subkeyColumnName='', startDate='', endDate=''): """ *********************************************************************** Provide aggregated data. *********************************************************************** Start and end dates are used to calculate interval crossings. :param dataType: String :param aggregationType: String :param timeColumnName: String :param subkeyColumnName: String :param startDate: String :param endDate: String :returns: MSGAggregatedData """ aggData = [] ci = lambda col_name: self.columns[dataType].split(',').index(col_name) rowCnt = 0 mySubkeys = [] if subkeyColumnName: mySubkeys = self.subkeys(dataType=dataType, timestampCol=timeColumnName, subkeyCol=subkeyColumnName, startDate=startDate, endDate=endDate) self.logger.log('subkeys: {}'.format(mySubkeys), 'debug') def __initSumAndCount(subkey=None, sums=None, cnts=None): """ Initialize the sum and cnt data structures. :param subkey: string :param sums: list | dict | None :param cnts: list | dict | None """ if not sums and not cnts: sums = {} cnts = {} if not mySubkeys: sums = [] cnts = [] for i in range(len(self.columns[dataType].split(','))): sums.append(0) cnts.append(0) else: if not subkey: for i in range(len(self.columns[dataType].split(','))): for k in mySubkeys: if k not in sums.keys(): sums[k] = [] cnts[k] = [] sums[k].append(0) cnts[k].append(0) else: sums[subkey] = [] for i in range(len(self.columns[dataType].split(','))): sums[subkey].append(0) cnts[subkey] = [] for i in range(len(self.columns[dataType].split(','))): cnts[subkey].append(0) return (sums, cnts) (sum, cnt) = __initSumAndCount() def __initIntervalCrossings(): """ Perform initialization of the interval crossings used to determine when interval crossings occur. :returns None """ subkeysToCheck = copy.copy(mySubkeys) self.logger.log('subkeys to check: {}'.format(subkeysToCheck), 'debug') if mySubkeys: for row in self.rawData( dataType=dataType, orderBy=[timeColumnName, subkeyColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate): # @CRITICAL: Exit after every subkey has been visited. # This scans the raw data until each subkey is encountered # ONCE and then exits. if subkeysToCheck != []: if row[ci(subkeyColumnName)] in subkeysToCheck: subkeysToCheck.remove(row[ci(subkeyColumnName)]) minute = row[ci( timeColumnName)].timetuple()[MINUTE_POSITION] if minute <= 15: self.nextMinuteCrossing[row[ci( subkeyColumnName)]] = 15 elif minute <= 30: self.nextMinuteCrossing[row[ci( subkeyColumnName)]] = 30 elif minute <= 45: self.nextMinuteCrossing[row[ci( subkeyColumnName)]] = 45 elif minute == 0 or minute <= 59: self.nextMinuteCrossing[row[ci( subkeyColumnName)]] = 0 else: raise Exception( 'Unable to determine next minute crossing') self.logger.log( 'next min crossing for {} = {}'.format( row[ci(subkeyColumnName)], self.nextMinuteCrossing[row[ci( subkeyColumnName)]]), 'debug') else: break else: # Non-subkey case e.g. weather data. rowCnt = 0 # @todo Optimize by querying only the first row. for row in self.rawData(dataType=dataType, orderBy=[timeColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate): minute = row[ci( timeColumnName)].timetuple()[MINUTE_POSITION] if minute <= 15: self.nextMinuteCrossingWithoutSubkeys = 15 elif minute <= 30: self.nextMinuteCrossingWithoutSubkeys = 30 elif minute <= 45: self.nextMinuteCrossingWithoutSubkeys = 45 elif minute == 0 or minute <= 59: self.nextMinuteCrossingWithoutSubkeys = 0 else: raise Exception( 'Unable to determine next minute crossing') self.logger.log( 'next min crossing = {}'.format( self.nextMinuteCrossingWithoutSubkeys), 'debug') rowCnt += 1 if rowCnt > 0: break __initIntervalCrossings() for row in self.rawData(dataType=dataType, orderBy=[timeColumnName, subkeyColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate): if mySubkeys: for col in self.columns[dataType].split(','): if self.mathUtil.isNumber( row[ci(col)]) and ci(col) != ci(subkeyColumnName): sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)] cnt[row[ci(subkeyColumnName)]][ci(col)] += 1 minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if self.intervalCrossed(minute=minute, subkey=row[ci(subkeyColumnName)]): minuteCrossed = minute # Perform aggregation on all of the previous data including # the current data for the current subkey. self.logger.log( 'key: {}'.format(row[ci(subkeyColumnName)]), 'debug') aggData += [ self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName), ci(subkeyColumnName), row[ci(subkeyColumnName)]) ] self.logger.log('minute crossed {}'.format(minuteCrossed), 'DEBUG') # Init current sum and cnt for subkey that has a completed # interval. (sum, cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)], sums=sum, cnts=cnt) else: for col in self.columns[dataType].split(','): if self.mathUtil.isNumber(row[ci(col)]): sum[ci(col)] += row[ci(col)] cnt[ci(col)] += 1 minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if self.intervalCrossed(minute=minute): aggData += [ self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName)) ] (sum, cnt) = __initSumAndCount(subkey=None, sums=sum, cnts=cnt) rowCnt += 1 self.logger.log('aggdata = {}'.format(aggData), 'debug') return MSGAggregatedData(aggregationType=aggregationType, columns=self.columns[dataType].split(','), data=aggData)
class MECODBInserter(object): """ Provides methods that perform insertion of MECO data. """ def __init__(self): """ Constructor. """ self.logger = MSGLogger(__name__, 'debug') self.mapper = MECOMapper() self.dupeChecker = MECODupeChecker() self.dbUtil = MSGDBUtil() def __call__(self, param): print "CallableClass.__call__(%s)" % param def insertData(self, conn, tableName, columnsAndValues, fKeyVal = None, withoutCommit = 0): """ Given a table name and a dictionary of column names and values, insert them to the DB. :param conn: database connection :param tableName: name of the db table :param columnsAndValues: dictionary of columns and values to be inserted to the db :param (optional) fKeyVal: an explicit foreign key value :param (optional) withoutCommit: a flag indicated that the insert will not be immediately committed :returns: A database cursor. """ cur = conn.cursor() # Get a dictionary of mapped (from DB to source data) column names. columnDict = self.mapper.getDBColNameDict(tableName) dbColsAndVals = {} if VISUALIZE_DATA: print "----------" + tableName + "----------" print columnDict print columnsAndValues for col in columnDict.keys(): # Use default as the value for the primary key so that the # private key is obtained from the predefined sequence. if col == '_pkey': if VISUALIZE_DATA: print columnDict[col], # DB col name. print 'DEFAULT' dbColsAndVals[columnDict[col]] = 'DEFAULT' # For the foreign key, set the value from the given parameter. elif col == '_fkey': if VISUALIZE_DATA: print columnDict[col], # DB col name. print fKeyVal dbColsAndVals[columnDict[col]] = fKeyVal else: if VISUALIZE_DATA: print columnDict[col], # DB col name. # The Register and Reading tables need to handle NULL # values as a special case. if tableName == 'Register' or tableName == 'Reading': try: if VISUALIZE_DATA: print columnsAndValues[col] # data source value dbColsAndVals[columnDict[col]] = columnsAndValues[col] except: if VISUALIZE_DATA: print 'NULL' dbColsAndVals[columnDict[col]] = 'NULL' # For all other cases, simply pass the value. else: if VISUALIZE_DATA: print columnsAndValues[col] # data source value dbColsAndVals[columnDict[col]] = columnsAndValues[col] # Add a creation timestamp to MeterData. if tableName == 'MeterData': dbColsAndVals['created'] = 'NOW()' cols = [] vals = [] for col in dbColsAndVals.keys(): cols.append(col) # DEFAULT, NULL and NOW() need to appear without quotes. if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}: vals.append(dbColsAndVals[col]) else: vals.append("'%s'" % dbColsAndVals[ col]) # Surround value with single quotes. sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( tableName, ','.join(cols), ','.join(vals)) self.dbUtil.executeSQL(cur, sql) if withoutCommit == 0: try: conn.commit() except: self.logger.log("ERROR: Commit failed.", 'debug') return cur
class MSGNotifier(object): """ Provides notification service functionality for MSG data processing. Email settings are stored in the local configuration. Usage: from msg_notifier import MSGNotifier self.notifier = MSGNotifier() Public API: sendNotificationEmail(msgBody, testing = False): Send msgBody as a notification to the mailing list defined in the config file. sendMailWithAttachments(msgBody, files = None, testing = False) Send msgBody with files attached as a notification to the mailing list defined in the config file. lastReportDate(noticeType): The last date where a notification of the given type was reported. recordNotificationEvent(noticeType): Record an event in the notification history. """ def __init__(self): """ Constructor. """ warnings.simplefilter('default') warnings.warn("This module is deprecated in favor of SEKNotifier.", DeprecationWarning) self.config = MSGConfiger() self.logger = SEKLogger(__name__, 'info') self.connector = MSGDBConnector() self.conn = self.connector.connectDB() self.cursor = self.conn.cursor() self.dbUtil = MSGDBUtil() self.noticeTable = 'NotificationHistory' self.notificationHeader = "This is a message from the Hawaii Smart " \ "Energy Project MSG Project notification " \ "system.\n\n" self.noReplyNotice = '\n\nThis email account is not monitored. No ' \ 'replies will originate from this ' \ 'account.\n\nYou are receiving this message ' \ 'because you are on the recipient list for ' \ 'notifications for the Hawaii Smart Energy ' \ 'Project.' def sendNotificationEmail(self, msgBody, testing = False): """ This method is an alternative to the multipart method in sendMailWithAttachments. :param msgBody: The body of the message to be sent. :param testing: True if running in testing mode. :returns: True for success, False for an error. """ errorOccurred = False user = self.config.configOptionValue('Notifications', 'email_username') password = self.config.configOptionValue('Notifications', 'email_password') fromaddr = self.config.configOptionValue('Notifications', 'email_from_address') if testing: toaddr = self.config.configOptionValue('Notifications', 'testing_email_recipients') else: toaddr = self.config.configOptionValue('Notifications', 'email_recipients') server = smtplib.SMTP(self.config.configOptionValue('Notifications', 'smtp_server_and_port')) try: server.starttls() except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP STARTTLS: {}".format(detail), 'ERROR') try: server.login(user, password) except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR') senddate = datetime.now().strftime('%Y-%m-%d') subject = "HISEP Notification" msgHeader = "Date: {}\r\nFrom: {}\r\nTo: {}\r\nSubject: {" \ "}\r\nX-Mailer: My-Mail\r\n\r\n".format(senddate, fromaddr, toaddr, subject) msgBody = self.notificationHeader + msgBody msgBody += self.noReplyNotice try: self.logger.log("Send email notification.", 'INFO') server.sendmail(fromaddr, toaddr, msgHeader + msgBody) server.quit() except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP sendmail: {}".format(detail), 'ERROR') return errorOccurred != True def sendMailWithAttachments(self, msgBody, files = None, testing = False): """ Send email along with attachments. :param msgBody: String containing the body of the messsage to send. :param files: List of file paths. This is a mutable argument that should be handled carefully as the default is defined only once. :param testing: True if running in testing mode. :returns: True if no exceptions are raised. """ if files is None: files = [] sys.stderr.write("Sending multipart email.\n") if testing: self.logger.log("Notification testing mode is ON.\n", 'info') errorOccurred = False assert type(files) == list user = self.config.configOptionValue('Notifications', 'email_username') password = self.config.configOptionValue('Notifications', 'email_password') if testing: send_to = self.config.configOptionValue('Notifications', 'testing_email_recipients') else: send_to = self.config.configOptionValue('Notifications', 'email_recipients') send_from = self.config.configOptionValue('Notifications', 'email_from_address') msg = MIMEMultipart() msg['From'] = send_from msg['To'] = send_to msg['Date'] = formatdate(localtime = True) msg['Subject'] = "HISEP Notification" msg.attach(MIMEText(msgBody)) for f in files: sys.stderr.write("Attaching file %s.\n" % f) part = MIMEBase('application', "octet-stream") part.set_payload(open(f, "rb").read()) Encoders.encode_base64(part) part.add_header('Content-Disposition', 'attachment; filename="%s"' % os.path.basename(f)) msg.attach(part) server = smtplib.SMTP(self.config.configOptionValue('Notifications', 'smtp_server_and_port')) try: server.starttls() except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP STARTTLS: %s" % detail, 'ERROR') try: server.login(user, password) except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR') self.logger.log("Send email notification.", 'INFO') try: server.sendmail(send_from, send_to, msg.as_string()) except smtplib.SMTPException as detail: errorOccurred = True self.logger.log("Exception during SMTP sendmail: %s" % detail, 'ERROR') server.quit() if errorOccurred == False: self.logger.log('No exceptions occurred.\n', 'info') return errorOccurred def recordNotificationEvent(self, noticeType = None): """ Save a notification event to the notification history. :param table: String :param noticeType: <enum 'MSGNotificationHistoryTypes'> :returns: Boolean """ if not noticeType: return False if not noticeType in MSGNotificationHistoryTypes: return False cursor = self.cursor sql = """INSERT INTO "{}" ("notificationType", "notificationTime") VALUES ('{}', NOW())""".format(self.noticeTable, noticeType.name) success = self.dbUtil.executeSQL(cursor, sql) self.conn.commit() if not success: raise Exception('Exception while saving the notification time.') return success def lastReportDate(self, noticeType = None): """ Get the last time a notification was reported for the given noticeType. :param noticeType: String indicating the type of the notification. It is stored in the event history. :returns: datetime of last report date. """ if not noticeType or (not noticeType in MSGNotificationHistoryTypes): raise Exception('Invalid notice type.') cursor = self.cursor sql = 'SELECT MAX("notificationTime") FROM "{}" WHERE ' \ '"notificationType" = \'{}\''.format(self.noticeTable, noticeType.name) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return None else: return rows[0][0] else: raise Exception('Exception during getting last report date.')
class MSGEgaugeNewDataChecker(object): """ Provide notification of newly loaded MSG eGauge data. This uses notification type MSG_EGAUGE_SERVICE. """ def __init__(self): """ Constructor. """ print __name__ self.logger = SEKLogger(__name__) self.connector = MSGDBConnector() self.dbUtil = MSGDBUtil() self.notifier = MSGNotifier() self.configer = MSGConfiger() def newDataCount(self): """ Measure the amount of new data that is present since the last time new data was reported. """ cursor = self.connector.conn.cursor() tableName = 'EgaugeEnergyAutoload' lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE') if lastTime is None: lastTime = '1900-01-01' sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % ( tableName, lastTime) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return 0 else: return rows[0][0] else: # @todo Raise an exception. return None def lastReportDate(self, notificationType): """ Get the last time a notification was reported. :param notificationType: A string indicating the type of the notification. It is stored in the event history. :returns: datetime of last report date. """ cursor = self.connector.conn.cursor() sql = """SELECT MAX("notificationTime") FROM "%s" WHERE "notificationType" = '%s'""" % ( NOTIFICATION_HISTORY_TABLE, notificationType) success = self.dbUtil.executeSQL(cursor, sql) if success: rows = cursor.fetchall() if not rows[0][0]: return None else: return rows[0][0] else: # @todo Raise an exception. return None def saveNotificationTime(self): """ Save the notification event to the notification history. """ cursor = self.connector.conn.cursor() sql = """INSERT INTO "%s" ("notificationType", "notificationTime") VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE success = self.dbUtil.executeSQL(cursor, sql) self.connector.conn.commit() if not success: # @todo Raise an exception. self.logger.log( 'An error occurred while saving the notification time.') def sendNewDataNotification(self, testing = False): """ Sending notification reporting on new data being available since the last time new data was reported. :param testing: Use testing mode when True. """ lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE') if not lastReportDate: lastReportDate = "never" msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\ .connector.dbName msgBody += '\n\n' msgBody += 'The new data count is %s readings.' % self.newDataCount() msgBody += '\n\n' msgBody += 'The last report date was %s.' % lastReportDate msgBody += '\n\n' self.notifier.sendNotificationEmail(msgBody, testing = testing) self.saveNotificationTime()
class MSGDataAggregator(object): """ Use for continuous data aggregation of diverse data types relevant to the Maui Smart Grid project. Four data types are supported: 1. Irradiance 2. Temperature/Humidity (weather) 3. Circuit 4. eGauge The general data form conforms to 1. timestamp, subkey_id, val1, val2, val3, ... 2. timestamp, val1, val2, val3, ... Case (2) is handled within the same space as (1) by testing for the existence of subkeys. Current aggregation consists of averaging over **15-min intervals**. Aggregation is performed in-memory and saved to the DB. The time range is delimited by start date and end date where the values are included in the range. The timestamps for aggregation intervals are the last timestamp in a respective series. * Aggregation subkeys are values such as eGauge IDs or circuit numbers. Aggregation is being implemented externally for performance and flexibility advantages over alternative approaches such as creating a view. It may be rolled into an internal function at future time if that proves to be beneficial. Usage: from msg_data_aggregator import MSGDataAggregator aggregator = MSGDataAggregator() API: aggregateAllData(dataType = dataType) aggregateNewData(dataType = dataType) """ def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False): """ Constructor. :param testing: if True, the testing DB will be connected instead of the production DB. """ self.logger = SEKLogger(__name__, "info") self.configer = MSGConfiger() self.conn = MSGDBConnector().connectDB() self.cursor = self.conn.cursor() self.dbUtil = MSGDBUtil() self.notifier = MSGNotifier() self.mathUtil = MSGMathUtil() self.timeUtil = MSGTimeUtil() self.nextMinuteCrossing = {} self.nextMinuteCrossingWithoutSubkeys = None self.exitOnError = exitOnError self.commitOnEveryInsert = commitOnEveryInsert section = "Aggregation" tableList = [ "irradiance", "agg_irradiance", "weather", "agg_weather", "circuit", "agg_circuit", "egauge", "agg_egauge", ] self.dataParams = { "weather": ("agg_weather", "timestamp", ""), "egauge": ("agg_egauge", "datetime", "egauge_id"), "circuit": ("agg_circuit", "timestamp", "circuit"), "irradiance": ("agg_irradiance", "timestamp", "sensor_id"), } self.columns = {} # tables[datatype] gives the table name for datatype. self.tables = {t: self.configer.configOptionValue(section, "{}_table".format(t)) for t in tableList} for t in self.tables.keys(): self.logger.log("t:{}".format(t), "DEBUG") try: self.columns[t] = self.dbUtil.columnsString(self.cursor, self.tables[t]) except TypeError as error: self.logger.log("Ignoring missing table: Error is {}.".format(error), "error") def existingIntervals(self, aggDataType="", timeColumnName=""): """ Retrieve the existing aggregation intervals for the given data type. :param aggDataType: string :param timeColumnName: string :return: List of intervals. """ return [ x[0] for x in self.rows( """SELECT {0} from \"{1}\" ORDER BY {2}""".format( timeColumnName, self.tables[aggDataType], timeColumnName ) ) ] def unaggregatedIntervalCount(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""): """ Return count of unaggregated intervals for a given data type. :param dataType: :param aggDataType: :param timeColumnName: :param idColumnName: :return: int """ return len(self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName, idColumnName)) def lastAggregationEndpoint(self, aggDataType="", timeColumnName=""): """ Last aggregation endpoint for a given datatype. :param dataType: :param timeColumnName: :return: """ return self.existingIntervals(aggDataType=aggDataType, timeColumnName=timeColumnName)[-1] def unaggregatedEndpoints(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""): """ Sorted (ascending) endpoints and their IDs, if available, for unaggregated intervals since the last aggregation endpoint for a given data type. This has a problem where an endpoint at 23:45:04 will be returned as 23:45:00. This makes the return value incorrect for raw data types having readings at sub-minute intervals such as data for circuit, irradiance and weather. This condition does not affect correct aggregation. Only the definition of the return value is wrong. :param dataType: string :param aggDataType: string :param timeColumnName: string :param idColName: string :return: list of datetimes. """ if idColumnName != "": # Key: # 0: raw # 1: agg # 2: time col # 3: id col # 4: last aggregated time sql = ( 'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON ' '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{' '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, ' "{3} ASC" ) self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName))) # The id column value is available in the tuple returned by # groupby but is not being used here. # @todo Exclude last endpoint if it is equal to the last # aggregation endpoint. # # The minute position filtering may be including the last # endpoint incorrectly because there are readings occurring # within the same minute as the final endpoint, e.g. 23:45:04, # 23:45:08, etc. # # This is not a problem with eGuage data due reading intervals # being every minute and zero seconds. return map( lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [ k for k, v in groupby( map( lambda y: y[0].timetuple()[0:5], filter( lambda x: x[0].timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0, [ (x[0], x[1]) for x in self.rows( sql.format( self.tables[dataType], self.tables[aggDataType], timeColumnName, idColumnName, self.lastAggregationEndpoint(aggDataType, timeColumnName), ) ) ], ), ) ) ], ) else: # Key: # 0: raw # 1: agg # 2: time col # 3: last aggregated time sql = ( 'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}=' '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' ' "ORDER BY {2} ASC" ) self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName))) return map( lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [ k for k, v in groupby( map( lambda y: y.timetuple()[0:5], filter( lambda x: x.timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0, [ (x[0]) for x in self.rows( sql.format( self.tables[dataType], self.tables[aggDataType], timeColumnName, self.lastAggregationEndpoint(aggDataType, timeColumnName), ) ) ], ), ) ) ], ) def intervalCrossed(self, minute=None, subkey=None): """ Determine interval crossing. Intervals are at 0, 15, 45, 60 min. The interval size is determined by MECO source data. :param minute: The integer value of the minute. :param subkey: The name for the subkey used for aggregation. :returns: True if an interval was crossed, False otherwise. """ if not minute and minute != 0: raise Exception("Minute not defined.") intervalSize = 15 first = 0 last = 60 if subkey is not None: if ( minute >= self.nextMinuteCrossing[subkey] and minute <= last and self.nextMinuteCrossing[subkey] != first ): self.nextMinuteCrossing[subkey] += intervalSize if self.nextMinuteCrossing[subkey] >= last: self.nextMinuteCrossing[subkey] = first self.logger.log("minute crossed at #1.", "debug") return True elif self.nextMinuteCrossing[subkey] == first and minute >= first and minute <= intervalSize: self.nextMinuteCrossing[subkey] = intervalSize self.logger.log("minute crossed at #2.", "debug") return True return False else: if ( minute >= self.nextMinuteCrossingWithoutSubkeys and minute <= last and self.nextMinuteCrossingWithoutSubkeys != first ): self.nextMinuteCrossingWithoutSubkeys += intervalSize if self.nextMinuteCrossingWithoutSubkeys >= last: self.nextMinuteCrossingWithoutSubkeys = first self.logger.log("minute crossed at #3.", "debug") return True elif self.nextMinuteCrossingWithoutSubkeys == first and minute >= first and minute <= intervalSize: self.nextMinuteCrossingWithoutSubkeys = intervalSize self.logger.log("minute crossed at #4.", "debug") return True return False def rows(self, sql): """ Rows from a SQL fetch. :param sql: Command to be executed. :returns: DB result set. """ self.logger.log("sql: {}".format(sql), "debug") self.dbUtil.executeSQL(self.cursor, sql) return self.cursor.fetchall() def rawData(self, dataType="", orderBy=None, timestampCol="", startDate="", endDate=""): """ Raw data to be aggregated. :param dataType: string :param orderBy: list :param timestampCol: string :param startDate: string :param endDate: string :returns: DB rows. """ # @todo Validate args. orderBy = filter(None, orderBy) return self.rows( """SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND '{}' ORDER BY {}""".format( self.columns[dataType], self.tables[dataType], timestampCol, startDate, endDate, ",".join(orderBy) ) ) def subkeys(self, dataType="", timestampCol="", subkeyCol="", startDate="", endDate=""): """ The distinct subkeys for a given data type within a time range. Subkeys are fields such as egauge_id in eGauge data or sensor_id in irradiance data. :param dataType: string :param timestampCol: string :param subkeyCol: string :param startDate: string :param endDate: string :returns: List of subkeys """ return [ sk[0] for sk in self.rows( """SELECT DISTINCT({}) FROM "{}" WHERE {} BETWEEN '{}' AND '{}' ORDER BY {}""".format( subkeyCol, self.tables[dataType], timestampCol, startDate, endDate, subkeyCol ) ) ] def insertAggregatedData(self, agg=None): """ :param agg: MSGAggregatedData :return: None """ if not agg.columns: raise Exception("agg columns not defined.") if not agg.data: raise Exception("agg data not defined.") self.logger.log("agg data: {}".format(agg.data)) self.logger.log("agg data type: {}".format(type(agg.data))) def __insertData(values=""): """ Perform insert of data to the database using the given values. :param values: String containing values to be inserted. :return Nothing. """ sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format( self.tables[agg.aggregationType], ",".join(agg.columns), values ) self.logger.log("sql: {}".format(sql), "debug") success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=self.exitOnError) # Used for a special case where data is reloaded. if self.commitOnEveryInsert: self.conn.commit() if not success and self.exitOnError: raise Exception("Failure during aggregated data insert.") for row in agg.data: if type(row) == type({}): # self.logger.log('row=%s' % row, 'debug') # self.logger.log('row type: %s' % type(row)) for key in row.keys(): values = "" valCnt = 0 for val in row[key]: if val == "NULL": values += val elif type(val) == type(""): values += "'" + val.strip() + "'" elif isinstance(val, datetime): values += "'" + val.isoformat() + "'" elif type(val) == type(0): values += str(val) elif type(val) == type(0.0): values += str(val) else: values += val if valCnt < len(agg.columns) - 1: values += "," valCnt += 1 __insertData(values=values) elif type(row) == type([]): values = "" valCnt = 0 for val in row: if val == "NULL": values += val elif type(val) == type(""): values += "'" + val.strip() + "'" elif isinstance(val, datetime): values += "'" + val.isoformat() + "'" elif type(val) == type(0): values += str(val) elif type(val) == type(0.0): values += str(val) else: values += val if valCnt < len(agg.columns) - 1: values += "," valCnt += 1 __insertData(values=values) else: self.logger.log("row = {}".format(row), "error") raise Exception("Row type not matched.") # End for row. self.conn.commit() def intervalAverages(self, sums, cnts, timestamp, timestampIndex, subkeyIndex=None, subkey=None): """ Aggregates all data for the current interval for the given subkey. For the case where there are no subkeys, subkeyIndex and subkey should be None. :param sums: list :param cnts: list :param timestamp: datetime :param timestampIndex: int :param subkeyIndex: int :param subkey: string :returns: Averaged data as a dict with form {subkey:data} """ if subkey is not None: myAvgs = {} reportedAgg = False myAvgs[subkey] = [] sumIndex = 0 self.logger.log("key: {}".format(subkey), "debug") # Iterate over sums. for s in sums[subkey]: if sumIndex == timestampIndex: myAvgs[subkey].append(timestamp) elif sumIndex == subkeyIndex: myAvgs[subkey].append(subkey) else: if cnts[subkey][sumIndex] != 0: if not reportedAgg: self.logger.log("Aggregating {} rows of data.".format(cnts[subkey][sumIndex]), "debug") reportedAgg = True myAvgs[subkey].append(s / cnts[subkey][sumIndex]) else: myAvgs[subkey].append("NULL") sumIndex += 1 return myAvgs else: myAvgs = [] reportedAgg = False sumIndex = 0 for s in sums: if sumIndex == timestampIndex: myAvgs.append(timestamp) else: if cnts[sumIndex] != 0: if not reportedAgg: self.logger.log("Aggregating {} rows of data.".format(cnts[sumIndex]), "debug") reportedAgg = True myAvgs.append(s / cnts[sumIndex]) else: myAvgs.append("NULL") sumIndex += 1 return myAvgs def dataParameters(self, dataType=""): """ Parameters for a given data type. :param dataType: string :return: (aggType, timeColName, subkeyColName) """ try: assert len(self.dataParams[dataType]) == 3 return self.dataParams[dataType] except: self.logger.log("Unmatched data type {}.".format(dataType)) def aggregateAllData(self, dataType=""): """ Convenience method for aggregating all data for a given data type. Data is inserted to individual aggregated data tables. :param dataType: String in the list of raw data types. :return: Nothing. """ (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) for start, end in self.monthStartsAndEnds(timeColumnName=timeColName, dataType=dataType): self.logger.log("start, end: {}, {}".format(start, end)) aggData = self.aggregatedData( dataType=dataType, aggregationType=aggType, timeColumnName=timeColName, subkeyColumnName=subkeyColName, startDate=start.strftime("%Y-%m-%d %H:%M:%S"), endDate=end.strftime("%Y-%m-%d %H:%M:%S"), ) self.insertAggregatedData(agg=aggData) for row in aggData.data: self.logger.log("aggData row: {}".format(row)) def aggregateNewData(self, dataType=""): """ Convenience method for aggregating new data. :param dataType: :return: dict of {dataType: count of aggregation endpoints} """ # The new aggregation starting point is equal to the last aggregation # endpoint up to the last unaggregated endpoint. (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) (end, start) = self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1] self.logger.log( "datatype: {}; start, end: {}, {}; end type: {}".format(dataType, start, end, type(end)), "critical" ) if type(end) == type(None): # No available unaggregated endpoints results in an empty list # for type egauge. The reason this does not work for other types is # because the other types of fractional minute readings and the # fractional minute readings are not being handled completely but # this method is still capable of working without problem. self.logger.log("Nothing to aggregate.") return {dataType: 0} if self.incrementEndpoint(start) >= end: self.logger.log("Nothing to aggregate.") return {dataType: 0} aggData = self.aggregatedData( dataType=dataType, aggregationType=aggType, timeColumnName=timeColName, subkeyColumnName=subkeyColName, startDate=self.incrementEndpoint(start).strftime("%Y-%m-%d %H:%M:%S"), endDate=end.strftime("%Y-%m-%d %H:%M:%S"), ) self.insertAggregatedData(agg=aggData) for row in aggData.data: self.logger.log("aggData row: {}".format(row)) self.logger.log("{} rows aggregated for {}.".format(len(aggData.data), dataType)) return {dataType: len(aggData.data)} def incrementEndpoint(self, endpoint=None): """ Increment an endpoint by one interval where endpoints are the final timestamp in an aggregation interval. :param endpoint: the endpoint to be incremented. :return: datetime object that is the given endpoint + a predefined amount of minutes. """ plusOneInterval = relativedelta(minutes=15) return endpoint + plusOneInterval def lastUnaggregatedAndAggregatedEndpoints(self, dataType=""): """ Return the endpoints for the given data type in the form {datatype: (last unaggregated endpoint, last aggregated endpoint)}. :param dataType: :return: dict with tuple. """ self.logger.log("datatype {}".format(dataType)) (aggType, timeColName, subkeyColName) = self.dataParameters(dataType) self.logger.log("subkey colname {}".format(subkeyColName)) unAggregatedEndpoints = self.unaggregatedEndpoints( dataType=dataType, aggDataType=aggType, timeColumnName=timeColName, idColumnName=subkeyColName ) self.logger.log("unagg endpoints: {}".format(unAggregatedEndpoints)) return { dataType: ( unAggregatedEndpoints[-1] if unAggregatedEndpoints != [] else None, self.lastAggregationEndpoint(aggDataType=aggType, timeColumnName=timeColName), ) } def aggregatedVsNewData(self): """ Convenience method. :return: dict of tuples containing {datatype:(last raw datetime, last agg datetime)} """ return { x.keys()[0]: (x.values()[0]) for x in map(self.lastUnaggregatedAndAggregatedEndpoints, [k for k in self.dataParams]) } def monthStartsAndEnds(self, timeColumnName="", dataType=""): """ Return first date and last date for the given **raw** data type for each month in the data's entire time range. The end date is incremented by on aggregation period to account for the data obtained at time 00:00. :param timeColumnName: string :param dataType: string :return: List of tuples. """ self.logger.log("datatype {}".format(dataType), "debug") (start, end) = self.rows( """SELECT MIN({}), MAX({}) FROM \"{}\"""".format(timeColumnName, timeColumnName, self.tables[dataType]) )[0] self.logger.log("start {}".format(start)) self.logger.log("end {}".format(end)) # End time needs transforming in split dates to extend the end of the # day to 23:59:59. splitDates = self.timeUtil.splitDates(start, end) startEndDatesTransform = [] i = 0 while i < len(splitDates): startEndDatesTransform.append( ( splitDates[i][0], self.incrementEndpoint( datetime( splitDates[i][1].timetuple()[0], splitDates[i][1].timetuple()[1], splitDates[i][1].timetuple()[2], 23, 59, 59, ) ), ) ) i += 1 return startEndDatesTransform def aggregatedData( self, dataType="", aggregationType="", timeColumnName="", subkeyColumnName="", startDate="", endDate="" ): """ *********************************************************************** Provide aggregated data. *********************************************************************** Start and end dates are used to calculate interval crossings. :param dataType: String :param aggregationType: String :param timeColumnName: String :param subkeyColumnName: String :param startDate: String :param endDate: String :returns: MSGAggregatedData """ aggData = [] ci = lambda col_name: self.columns[dataType].split(",").index(col_name) rowCnt = 0 mySubkeys = [] if subkeyColumnName: mySubkeys = self.subkeys( dataType=dataType, timestampCol=timeColumnName, subkeyCol=subkeyColumnName, startDate=startDate, endDate=endDate, ) self.logger.log("subkeys: {}".format(mySubkeys), "debug") def __initSumAndCount(subkey=None, sums=None, cnts=None): """ Initialize the sum and cnt data structures. :param subkey: string :param sums: list | dict | None :param cnts: list | dict | None """ if not sums and not cnts: sums = {} cnts = {} if not mySubkeys: sums = [] cnts = [] for i in range(len(self.columns[dataType].split(","))): sums.append(0) cnts.append(0) else: if not subkey: for i in range(len(self.columns[dataType].split(","))): for k in mySubkeys: if k not in sums.keys(): sums[k] = [] cnts[k] = [] sums[k].append(0) cnts[k].append(0) else: sums[subkey] = [] for i in range(len(self.columns[dataType].split(","))): sums[subkey].append(0) cnts[subkey] = [] for i in range(len(self.columns[dataType].split(","))): cnts[subkey].append(0) return (sums, cnts) (sum, cnt) = __initSumAndCount() def __initIntervalCrossings(): """ Perform initialization of the interval crossings used to determine when interval crossings occur. :returns None """ subkeysToCheck = copy.copy(mySubkeys) self.logger.log("subkeys to check: {}".format(subkeysToCheck), "debug") if mySubkeys: for row in self.rawData( dataType=dataType, orderBy=[timeColumnName, subkeyColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate, ): # @CRITICAL: Exit after every subkey has been visited. # This scans the raw data until each subkey is encountered # ONCE and then exits. if subkeysToCheck != []: if row[ci(subkeyColumnName)] in subkeysToCheck: subkeysToCheck.remove(row[ci(subkeyColumnName)]) minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if minute <= 15: self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 15 elif minute <= 30: self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 30 elif minute <= 45: self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 45 elif minute == 0 or minute <= 59: self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 0 else: raise Exception("Unable to determine next minute crossing") self.logger.log( "next min crossing for {} = {}".format( row[ci(subkeyColumnName)], self.nextMinuteCrossing[row[ci(subkeyColumnName)]] ), "debug", ) else: break else: # Non-subkey case e.g. weather data. rowCnt = 0 # @todo Optimize by querying only the first row. for row in self.rawData( dataType=dataType, orderBy=[timeColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate, ): minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if minute <= 15: self.nextMinuteCrossingWithoutSubkeys = 15 elif minute <= 30: self.nextMinuteCrossingWithoutSubkeys = 30 elif minute <= 45: self.nextMinuteCrossingWithoutSubkeys = 45 elif minute == 0 or minute <= 59: self.nextMinuteCrossingWithoutSubkeys = 0 else: raise Exception("Unable to determine next minute crossing") self.logger.log("next min crossing = {}".format(self.nextMinuteCrossingWithoutSubkeys), "debug") rowCnt += 1 if rowCnt > 0: break __initIntervalCrossings() for row in self.rawData( dataType=dataType, orderBy=[timeColumnName, subkeyColumnName], timestampCol=timeColumnName, startDate=startDate, endDate=endDate, ): if mySubkeys: for col in self.columns[dataType].split(","): if self.mathUtil.isNumber(row[ci(col)]) and ci(col) != ci(subkeyColumnName): sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)] cnt[row[ci(subkeyColumnName)]][ci(col)] += 1 minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if self.intervalCrossed(minute=minute, subkey=row[ci(subkeyColumnName)]): minuteCrossed = minute # Perform aggregation on all of the previous data including # the current data for the current subkey. self.logger.log("key: {}".format(row[ci(subkeyColumnName)]), "debug") aggData += [ self.intervalAverages( sum, cnt, row[ci(timeColumnName)], ci(timeColumnName), ci(subkeyColumnName), row[ci(subkeyColumnName)], ) ] self.logger.log("minute crossed {}".format(minuteCrossed), "DEBUG") # Init current sum and cnt for subkey that has a completed # interval. (sum, cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)], sums=sum, cnts=cnt) else: for col in self.columns[dataType].split(","): if self.mathUtil.isNumber(row[ci(col)]): sum[ci(col)] += row[ci(col)] cnt[ci(col)] += 1 minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION] if self.intervalCrossed(minute=minute): aggData += [self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName))] (sum, cnt) = __initSumAndCount(subkey=None, sums=sum, cnts=cnt) rowCnt += 1 self.logger.log("aggdata = {}".format(aggData), "debug") return MSGAggregatedData( aggregationType=aggregationType, columns=self.columns[dataType].split(","), data=aggData )
class MECODBInserter(object): """ Provides methods that perform insertion of MECO data. """ def __init__(self): """ Constructor. """ self.logger = SEKLogger(__name__, 'debug') self.mapper = MECOMapper() self.dupeChecker = MECODupeChecker() self.dbUtil = MSGDBUtil() def __call__(self, param): print "CallableClass.__call__(%s)" % param def insertData(self, conn, tableName, columnsAndValues, fKeyVal=None, withoutCommit=0): """ Given a table name and a dictionary of column names and values, insert them to the DB. :param conn: database connection :param tableName: name of the db table :param columnsAndValues: dictionary of columns and values to be inserted to the db :param (optional) fKeyVal: an explicit foreign key value :param (optional) withoutCommit: a flag indicated that the insert will not be immediately committed :returns: A database cursor. """ cur = conn.cursor() # Get a dictionary of mapped (from DB to source data) column names. columnDict = self.mapper.getDBColNameDict(tableName) dbColsAndVals = {} if VISUALIZE_DATA: print "----------" + tableName + "----------" print columnDict print columnsAndValues for col in columnDict.keys(): # Use default as the value for the primary key so that the # private key is obtained from the predefined sequence. if col == '_pkey': if VISUALIZE_DATA: print columnDict[col], # DB col name. print 'DEFAULT' dbColsAndVals[columnDict[col]] = 'DEFAULT' # For the foreign key, set the value from the given parameter. elif col == '_fkey': if VISUALIZE_DATA: print columnDict[col], # DB col name. print fKeyVal dbColsAndVals[columnDict[col]] = fKeyVal else: if VISUALIZE_DATA: print columnDict[col], # DB col name. # The Register and Reading tables need to handle NULL # values as a special case. if tableName == 'Register' or tableName == 'Reading': try: if VISUALIZE_DATA: print columnsAndValues[col] # data source value dbColsAndVals[columnDict[col]] = columnsAndValues[col] except: if VISUALIZE_DATA: print 'NULL' dbColsAndVals[columnDict[col]] = 'NULL' # For all other cases, simply pass the value. else: if VISUALIZE_DATA: print columnsAndValues[col] # data source value dbColsAndVals[columnDict[col]] = columnsAndValues[col] # Add a creation timestamp to MeterData. if tableName == 'MeterData': dbColsAndVals['created'] = 'NOW()' cols = [] vals = [] for col in dbColsAndVals.keys(): cols.append(col) # DEFAULT, NULL and NOW() need to appear without quotes. if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}: vals.append(dbColsAndVals[col]) else: vals.append( "'%s'" % dbColsAndVals[col]) # Surround value with single quotes. sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( tableName, ','.join(cols), ','.join(vals)) self.dbUtil.executeSQL(cur, sql) if withoutCommit == 0: try: conn.commit() except: self.logger.log("ERROR: Commit failed.", 'debug') return cur
class MSGDBUtilTester(unittest.TestCase): """ Unit tests for MECO DB Utils. """ def setUp(self): self.i = MECODBInserter() # Connect to the testing database. self.connector = MSGDBConnector(testing = True) self.conn = self.connector.connectDB() self.lastSeqVal = None # Does this work having the dictCur be in another class? self.dictCur = self.connector.dictCur self.cursor = self.conn.cursor() self.deleter = MECODBDeleter() self.tableName = 'MeterData' self.columnName = 'meter_data_id' self.configer = MSGConfiger() self.logger = MSGLogger(__name__, 'debug') self.dbUtil = MSGDBUtil() def testMECODBUtilCanBeInited(self): self.assertIsNotNone(self.dbUtil) def testLastSequenceNumberIsCorrect(self): """ Test if last sequence ID value is generated correctly. Do this by inserting and deleting a DB record. """ # Insert some values. sampleDict = {'MeterName': '100001', 'UtilDeviceID': '100001', 'MacID': '00:00:00:00:00:00:00:00'} self.i.insertData(self.conn, self.tableName, sampleDict) self.lastSeqVal = self.dbUtil.getLastSequenceID(self.conn, self.tableName, self.columnName) print "lastSeqVal = %s" % self.lastSeqVal sql = """SELECT * FROM "%s" WHERE %s = %s""" % ( self.tableName, self.columnName, self.lastSeqVal) dictCur = self.connector.dictCur self.dbUtil.executeSQL(dictCur, sql) row = dictCur.fetchone() meterDataID = row[self.columnName] self.assertEqual(self.lastSeqVal, meterDataID) def testGetDBName(self): dbName = self.dbUtil.getDBName(self.cursor)[0] self.logger.log("DB name is %s" % dbName, 'info') self.assertEqual(dbName, "test_meco", "Testing DB name should be set correctly.") def testEraseTestingDatabase(self): """ Test that calls to eraseTestMeco() work correctly. """ dbName = self.dbUtil.getDBName(self.cursor)[0] self.logger.log("DB name is %s" % dbName, 'info') self.assertEqual(dbName, "test_meco", "Testing DB name should be set correctly.") self.dbUtil.eraseTestMeco() # Check all of the tables for the presence of records. for table in self.configer.insertTables: sql = """select count(*) from "%s";""" % table self.dbUtil.executeSQL(self.dictCur, sql) row = self.dictCur.fetchone() self.assertEqual(row[0], 0, "No records should be present in the %s table." % table) def testColumns(self): """ Test the ability to retrieve the column names from a database. """ print self.dbUtil.columns(self.cursor, 'Event') def tearDown(self): """ Delete the record that was inserted. """ if self.lastSeqVal != None: self.deleter.deleteRecord(self.conn, self.tableName, self.columnName, self.lastSeqVal) self.connector.closeDB(self.conn)
class MSGNOAAWeatherDataInserter(object): """ Performs weather data insertion to a database. """ def __init__(self, testing = False): """ Constructor. :param testing: True if testing mode is being used. """ self.logger = SEKLogger(__name__, 'info') self.dbUtil = MSGDBUtil() self.dupeChecker = MSGWeatherDataDupeChecker() def insertDataDict(self, conn, tableName, listOfDataDicts, commit = False): """ Given a table name and a dictionary of column names and values, insert them to the db. :param conn: A database connection. :param tableName: Name of the DB table to be inserted to. :param columnsAndValues: Dictionary of columns and values to be inserted to the DB. :param (optional) commit: A flag indicated that DB transactions will be committed. :returns: Set of datetimes processed. """ cur = conn.cursor() processedDateTimes = set() for row in listOfDataDicts: # Add a creation timestamp using the SQL function. row['created'] = 'NOW()' cols = [] vals = [] for col in row.keys(): # Prepare the columns and values for insertion via SQL. cols.append(col) if (row[col] != 'NULL'): # Surround each value with single quotes... vals.append("'%s'" % row[col]) else: # Except for NULL values. vals.append("%s" % row[col]) sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % ( tableName, ','.join(cols), ','.join(vals)) if self.dupeChecker.duplicateExists(cur, row['wban'], row['datetime'], row['record_type']): self.logger.log("Dupe found, dropping dupe.", 'info') else: processedDateTimes.add( dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M")) if self.dbUtil.executeSQL(cur, sql, exitOnFail = False) is False: # An error occurred. for col in sorted(row.keys()): print "%s: %s" % (col, row[col]) sys.exit(-1) if commit: try: conn.commit() except: self.logger.log("ERROR: Commit failed.", 'debug') return processedDateTimes
class MSGWeatherDataUtil(object): """ Utility methods for working with weather data. """ def __init__(self): """ Constructor. A database connection is not maintained here to keep this class lightweight. This results in the class not having a parameter for TESTING MODE. """ self.logger = MSGLogger(__name__, 'info') self.configer = MSGConfiger() self.url = self.configer.configOptionValue('Weather Data', 'weather_data_url') self.pattern = self.configer.configOptionValue('Weather Data', 'weather_data_pattern') self.fileList = [] self.dateList = [] # List of dates corresponding weather data files. self.fillFileListAndDateList() self.dbUtil = MSGDBUtil() def fillFileListAndDateList(self): """ Return a list of weather files obtained from the remote server used in processing weather data. """ response = urllib2.urlopen(self.url).read() for filename in re.findall(self.pattern, response): self.fileList.append(filename[0]) self.dateList.append(self.datePart(filename = filename[0])) def datePart(self, filename = None, datetime = None): """ Return the date part of a NOAA weather data filename. :param: The filename. :param: A datetime object. :returns: The date part of the given parameter. """ assert filename == None or datetime == None, "One argument is allowed." if filename: newName = filename.replace("QCLCD", '') newName = newName.replace(".zip", '') return newName if datetime: return datetime.strftime('%Y-%m-%d') def getLastDateLoaded(self, cursor): """ Return the last date of loaded weather data. :returns: Last date. """ sql = """select wban, datetime, record_type from "%s" ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE self.dbUtil.executeSQL(cursor, sql) row = cursor.fetchone() # self.logger.log('Date last loaded = %s' % row[1], 'info') return row[1] def getKeepList(self, fileList, cursor): """ The Keep List is the list of filenames of files containing data that are *within* the month of the last loaded date or are beyond the last loaded date. :param: fileList: A list of files containing weather data. :param: DB cursor. :returns: List of weather data filenames to process. """ keepList = [] i = 0 for date in fileList: self.logger.log('Examining date %s.' % date) # The list date should be the last day of the month. # It is the date that is compared against the last retrieved date. listDate = dt.datetime.strptime(self.datePart(filename = date), "%Y%m") lastDay = calendar.monthrange(listDate.year, listDate.month)[1] listDate = dt.datetime.strptime( '%s-%s-%s' % (listDate.year, listDate.month, lastDay), "%Y-%m-%d") self.logger.log('List date = %s.' % listDate) lastDate = self.getLastDateLoaded(cursor) self.logger.log('last date = %s' % lastDate) if lastDate <= listDate: keepList.append((i, listDate)) i += 1 if keepList: keepList.sort() return [fileList[d[0]] for d in keepList]
class MECODupeChecker(object): """ Check for duplicate data in the database. """ def __init__(self): """ Constructor. """ self.logger = SEKLogger(__name__, 'debug') self.mecoConfig = MSGConfiger() self.currentReadingID = 0 self.dbUtil = MSGDBUtil() def getLastElement(self, rows): """ Get the last element in a collection. Example: rows = (element1, element2, element3) getLastElement(rows) # return element3 :param rows Result froms from a query :return last element in the collection """ for i, var in enumerate(rows): if i == len(rows) - 1: return var def eventBranchDupeExists(self, conn, meterName, eventTime): """ :param conn: Database connection. :param meterName: Meter name in MeterData table. :param eventTime: Timestamp of event. :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() sql = """SELECT "Event".event_time, "MeterData".meter_data_id, "EventData".event_data_id FROM ( ( "MeterData" JOIN "EventData" ON ( ( "MeterData".meter_data_id = "EventData" .meter_data_id ) ) ) JOIN "Event" ON ( ( "EventData".event_data_id = "Event" .event_data_id ) ) ) WHERE "MeterData".meter_name = '%s' AND "Event".event_time = '%s' """ % (meterName, eventTime) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False def registerBranchDupeExists(self, conn, meterName, readTime, registerNumber, DEBUG=False): """ Determine if a register branch duplicate exists for a given meter name, read time, number tuple. :param conn: Database connection. :param meterName: Meter name in MeterData table. :param readTime: Read time in RegisterRead table. :param registerNumber: Corresponds to DB column "number". :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() sql = """SELECT "public"."MeterData".meter_name, "public"."RegisterRead".read_time, "public"."Register"."number" FROM "public"."MeterData" INNER JOIN "public"."RegisterData" ON "public" ."MeterData".meter_data_id = "public" ."RegisterData".meter_data_id INNER JOIN "public"."RegisterRead" ON "public"."RegisterData" .register_data_id = "public" ."RegisterRead".register_data_id INNER JOIN "public"."Tier" ON "public"."RegisterRead" .register_read_id = "public"."Tier" .register_read_id INNER JOIN "public"."Register" ON "public"."Tier".tier_id = "public"."Register".tier_id WHERE "public"."MeterData".meter_name = '%s' AND "public"."RegisterRead".read_time = '%s' AND "public"."Register".number = '%s' """ % (meterName, readTime, registerNumber) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: return True else: return False def readingBranchDupeExists(self, conn, meterName, endTime, channel=None, DEBUG=False): """ Duplicate cases: 1. Tuple (meterID, endTime) exists in the database. @DEPRECATED in favor of (2), full meterName-endTime-channel query. 2. Tuple (meterID, endTime, channel) exists in the database. :param conn: Database connection. :param meterName: Meter name in MeterData table. :param endTime: End time in Interval table. :param channel: Required parameter that was previously optional. An optional channel is now deprecated. :return: True if tuple exists, False if not. """ dbCursor = conn.cursor() if DEBUG: print "readingBranchDupeExists():" if channel != None: sql = """SELECT "Interval".end_time, "MeterData".meter_name, "MeterData".meter_data_id, "Reading".channel, "Reading".reading_id FROM "MeterData" INNER JOIN "IntervalReadData" ON "MeterData" .meter_data_id = "IntervalReadData".meter_data_id INNER JOIN "Interval" ON "IntervalReadData" .interval_read_data_id = "Interval".interval_read_data_id INNER JOIN "Reading" ON "Interval".interval_id = "Reading" .interval_id WHERE "Interval".end_time = '%s' and meter_name = '%s' and channel = '%s'""" % (endTime, meterName, channel) else: # deprecated query sql = """SELECT "Interval".end_time, "MeterData".meter_name, "MeterData".meter_data_id FROM "MeterData" INNER JOIN "IntervalReadData" ON "MeterData" .meter_data_id = "IntervalReadData".meter_data_id INNER JOIN "Interval" ON "IntervalReadData" .interval_read_data_id = "Interval".interval_read_data_id WHERE "Interval".end_time = '%s' and meter_name = '%s'""" % (endTime, meterName) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if len(rows) > 0: assert len( rows) < 2, "Dupes should be less than 2, found %s: %s." % ( len(rows), rows) self.currentReadingID = self.getLastElement(rows[0]) self.logger.log('Reading ID = %s.' % self.currentReadingID, 'silent') self.logger.log( "Duplicate found for meter %s, end time %s, channel %s." % (meterName, endTime, channel), 'silent') return True else: self.logger.log( "Found no rows for meter %s, end time %s, channel %s." % (meterName, endTime, channel), 'silent') return False def readingValuesAreInTheDatabase(self, conn, readingDataDict): """ Given a reading ID, verify that the values associated are present in the database. Values are from the columns: 1. channel 2. raw_value 3. uom 4. value :param dictionary containing reading values :return True if the existing values are the same, otherwise return False """ dbCursor = conn.cursor() sql = """SELECT "Reading".reading_id, "Reading".channel, "Reading".raw_value, "Reading".uom, "Reading"."value" FROM "Reading" WHERE "Reading".reading_id = %s""" % ( self.currentReadingID) self.dbUtil.executeSQL(dbCursor, sql) rows = dbCursor.fetchall() if self.currentReadingID == 0: return False # assert len(rows) == 1 or len(rows) == 0 assert len( rows) == 1, "Didn't find a matching reading for reading ID %s." %\ self.currentReadingID if len(rows) == 1: self.logger.log("Found %s existing matches." % len(rows), 'silent') allEqual = True if int(readingDataDict['Channel']) == int(rows[0][1]): print "channel equal," else: self.logger.log( "channel not equal: %s,%s,%s" % (int(readingDataDict['Channel']), int( rows[0][1]), readingDataDict['Channel'] == rows[0][1]), 'debug') allEqual = False if int(readingDataDict['RawValue']) == int(rows[0][2]): print "raw value equal," else: self.logger.log( "rawvalue not equal: %s,%s,%s" % (int(readingDataDict['RawValue']), int(rows[0][2]), readingDataDict['RawValue'] == rows[0][2]), 'debug') allEqual = False if readingDataDict['UOM'] == rows[0][3]: print "uom equal," else: self.logger.log( "uom not equal: %s,%s,%s" % (readingDataDict['UOM'], rows[0][3], readingDataDict['UOM'] == rows[0][3]), 'debug') allEqual = False if self.approximatelyEqual(float(readingDataDict['Value']), float(rows[0][4]), 0.001): self.logger.log("value equal", 'silent') else: self.logger.log( "value not equal: %s,%s,%s" % (float(readingDataDict['Value']), float( rows[0][4]), readingDataDict['Value'] == rows[0][4]), 'debug') allEqual = False if allEqual: return True else: return False else: return False def approximatelyEqual(self, a, b, tolerance): return abs(a - b) < tolerance
class MSGDBUtilTester(unittest.TestCase): """ Unit tests for MECO DB Utils. """ def setUp(self): self.i = MECODBInserter() # Connect to the testing database. self.connector = MSGDBConnector(testing=True) self.conn = self.connector.connectDB() self.lastSeqVal = None # Does this work having the dictCur be in another class? self.dictCur = self.connector.dictCur self.cursor = self.conn.cursor() self.deleter = MECODBDeleter() self.tableName = 'MeterData' self.columnName = 'meter_data_id' self.configer = MSGConfiger() self.logger = SEKLogger(__name__, 'debug') self.dbUtil = MSGDBUtil() def testMECODBUtilCanBeInited(self): self.assertIsNotNone(self.dbUtil) def testLastSequenceNumberIsCorrect(self): """ Test if last sequence ID value is generated correctly. Do this by inserting and deleting a DB record. """ # Insert some values. sampleDict = { 'MeterName': '100001', 'UtilDeviceID': '100001', 'MacID': '00:00:00:00:00:00:00:00' } self.i.insertData(self.conn, self.tableName, sampleDict) self.lastSeqVal = self.dbUtil.getLastSequenceID( self.conn, self.tableName, self.columnName) print "lastSeqVal = %s" % self.lastSeqVal sql = """SELECT * FROM "%s" WHERE %s = %s""" % ( self.tableName, self.columnName, self.lastSeqVal) dictCur = self.connector.dictCur self.dbUtil.executeSQL(dictCur, sql) row = dictCur.fetchone() meterDataID = row[self.columnName] self.assertEqual(self.lastSeqVal, meterDataID) def testGetDBName(self): dbName = self.dbUtil.getDBName(self.cursor)[0] self.logger.log("DB name is %s" % dbName, 'info') self.assertEqual(dbName, "test_meco", "Testing DB name should be set correctly.") def testEraseTestingDatabase(self): """ Test that calls to eraseTestMeco() work correctly. """ dbName = self.dbUtil.getDBName(self.cursor)[0] self.logger.log("DB name is %s" % dbName, 'info') self.assertEqual(dbName, "test_meco", "Testing DB name should be set correctly.") self.dbUtil.eraseTestMeco() # Check all of the tables for the presence of records. for table in self.configer.insertTables: sql = """select count(*) from "%s";""" % table self.dbUtil.executeSQL(self.dictCur, sql) row = self.dictCur.fetchone() self.assertEqual( row[0], 0, "No records should be present in the %s table." % table) def testColumns(self): """ Test the ability to retrieve the column names from a database. """ print self.dbUtil.columns(self.cursor, 'Event') def tearDown(self): """ Delete the record that was inserted. """ if self.lastSeqVal != None: self.deleter.deleteRecord(self.conn, self.tableName, self.columnName, self.lastSeqVal) self.connector.closeDB(self.conn)