def test_log_successful_export(self):
        """
        Test logging of export results to the export history table.
        """
        # @REVIEWED
        self.assertTrue(self.exporter.logSuccessfulExport(name = 'test_export',
                                                          url =
                                                          'http://test_url',
                                                          datetime = 0,
                                                          size = 100))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()

        self.assertTrue(
            dbUtil.executeSQL(cursor, 'select * from "ExportHistory" where '
                                      'timestamp = '
                                      'to_timestamp(0)'))

        self.assertEqual(len(cursor.fetchall()), 1,
                         "There should only be one result row.")

        self.assertTrue(
            dbUtil.executeSQL(cursor, 'delete from "ExportHistory" where '
                                      'timestamp = to_timestamp(0)'))
        conn.commit()
Пример #2
0
class MECODBReader(object):
    """
    Read records from a database.
    """

    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: True if in testing mode.
        """

        self.connector = MSGDBConnector()
        self.conn = MSGDBConnector(testing).connectDB()
        self.dbUtil = MSGDBUtil()
        self.dbName = self.dbUtil.getDBName(self.connector.dictCur)

    def selectRecord(self, conn, table, keyName, keyValue):
        """
        Read a record in the database given a table name, primary key name,
        and value for the key.

        :param conn DB connection
        :param table DB table name
        :param keyName DB column name for primary key
        :param keyValue Value to be matched
        :returns: Row containing record data.
        """

        print "selectRecord:"
        sql = """SELECT * FROM "%s" WHERE %s = %s""" % (
        table, keyName, keyValue)
        dcur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
        self.dbUtil.executeSQL(dcur, sql)
        row = dcur.fetchone()
        return row

    def readingAndMeterCounts(self):
        """
        Retrieve the reading and meter counts.

        :returns: Multiple lists containing the retrieved data.
        """

        sql = """SELECT "Day", "Reading Count",
        "Meter Count" FROM count_of_readings_and_meters_by_day"""
        dcur = self.conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
        self.dbUtil.executeSQL(dcur, sql)
        rows = dcur.fetchall()

        dates = []
        meterCounts = []
        readingCounts = []

        for row in rows:
            dates.append(row[0])
            readingCounts.append(row[1] / row[2])
            meterCounts.append(row[2])

        return dates, readingCounts, meterCounts
Пример #3
0
    def test_log_successful_export(self):
        """
        Test logging of export results to the export history table.
        """
        # @REVIEWED
        self.assertTrue(
            self.exporter.logSuccessfulExport(name='test_export',
                                              url='http://test_url',
                                              datetime=0,
                                              size=100))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()

        self.assertTrue(
            dbUtil.executeSQL(
                cursor, 'select * from "ExportHistory" where '
                'timestamp = '
                'to_timestamp(0)'))

        self.assertEqual(len(cursor.fetchall()), 1,
                         "There should only be one result row.")

        self.assertTrue(
            dbUtil.executeSQL(
                cursor, 'delete from "ExportHistory" where '
                'timestamp = to_timestamp(0)'))
        conn.commit()
class MECODBDeleter(object):
    """
    Provide delete routines for MECO DB.
    """

    def __init__(self):
        """
        Constructor.
        """
        self.dbUtil = MSGDBUtil()


    def deleteRecord(self, conn, tableName, idText, idValue):
        """
        Delete record from DB where record has an int-based serial number.

        param: tableName
        param: idText DB column name for record ID
        param: idValue Value of the ID to be deleted
        """

        sql = """DELETE FROM "{}" where {} = {}""".format(tableName, idText,
                                                          idValue)
        dictCur = conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
        self.dbUtil.executeSQL(dictCur, sql)
        conn.commit()
class MSGWeatherDataDupeChecker(object):
    """
    Determine if a duplicate record exists based on the tuple

    (WBAN, Date, Time, StationType).
    """

    def __init__(self, testing = False):
        """
        Constructor.

        :param testing: Flag for testing mode.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()


    def duplicateExists(self, dbCursor, wban, datetime, recordType):
        """
        Check for the existence of a duplicate record.

        :param dbCursor
        :param wban
        :param datetime
        :param recordType
        :returns: True if a duplicate record exists, otherwise False.
        """

        tableName = "WeatherNOAA"
        sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE
                 wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % (
            tableName, wban, datetime, recordType)

        self.logger.log("sql=%s" % sql, 'debug')
        self.logger.log("wban=%s, datetime=%s, record_type=%s" % (
            wban, datetime, recordType), 'debug')

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False
Пример #6
0
class MSGWeatherDataDupeChecker(object):
    """
    Determine if a duplicate record exists based on the tuple

    (WBAN, Date, Time, StationType).
    """
    def __init__(self, testing=False):
        """
        Constructor.

        :param testing: Flag for testing mode.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()

    def duplicateExists(self, dbCursor, wban, datetime, recordType):
        """
        Check for the existence of a duplicate record.

        :param dbCursor
        :param wban
        :param datetime
        :param recordType
        :returns: True if a duplicate record exists, otherwise False.
        """

        tableName = "WeatherNOAA"
        sql = """SELECT wban, datetime, record_type FROM \"%s\" WHERE
                 wban = '%s' AND datetime = '%s' AND record_type = '%s'""" % (
            tableName, wban, datetime, recordType)

        self.logger.log("sql=%s" % sql, 'debug')
        self.logger.log(
            "wban=%s, datetime=%s, record_type=%s" %
            (wban, datetime, recordType), 'debug')

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False
def insertData(files, table, cols, testing = False):
	"""
	Insert aggregated data generated by this script into a database table.

	:param files: A list of the filenames to be processed.
	:param table: The name of the table in the DB.
	:param cols: A list of the columns (as strings) in the table.
	:param testing: Specify whether to use test (false by default).
	"""
	connector = MSGDBConnector()
	conn = connector.connectDB()
	dbUtil = MSGDBUtil()
	cursor = conn.cursor()

	cnt = 0

	for file in files:

		with open(file, 'r') as csvfile:
			reader = csv.reader(csvfile, delimiter = ',')
			# Skip the header line.
			reader.next()
			for row in reader:
				sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
					table, ','.join(cols),
					','.join("'" + item.strip() + "'" for item in row))

				sql = sql.replace("'NULL'", 'NULL')

				dbUtil.executeSQL(cursor, sql)

				cnt += 1
				if cnt % 10000 == 0:
					conn.commit()

		conn.commit()
		cnt = 0
Пример #8
0
def insertData(files, table, cols):
    """
	Insert aggregated data generated by this script into a database table.

	:param files: A list of the filenames to be processed.
	:param table: The name of the table in the DB.
	:param cols: A list of the columns (as strings) in the table.
	:param testing: Specify whether to use test 
	"""

    connector = MSGDBConnector()
    conn = connector.connectDB()
    dbUtil = MSGDBUtil()
    cursor = conn.cursor()

    cnt = 0

    for file in files:

        with open(file, 'rb') as csvfile:
            myReader = csv.reader(csvfile, delimiter=',')
            # Skip the header line.
            myReader.next()
            for row in myReader:
                print row
                sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
                    table, ','.join(cols), ','.join("'" + item.strip() + "'"
                                                    for item in row))

                sql = sql.replace("'NULL'", 'NULL')
                dbUtil.executeSQL(cursor, sql)
                cnt += 1
                if cnt % 10000 == 0:
                    conn.commit()

        conn.commit()
        cnt = 0
Пример #9
0
class MECODBDeleter(object):
    """
    Provide delete routines for MECO DB.
    """
    def __init__(self):
        """
        Constructor.
        """
        self.dbUtil = MSGDBUtil()

    def deleteRecord(self, conn, tableName, idText, idValue):
        """
        Delete record from DB where record has an int-based serial number.

        param: tableName
        param: idText DB column name for record ID
        param: idValue Value of the ID to be deleted
        """

        sql = """delete from "%s" where %s = %s""" % (tableName, idText,
                                                      idValue)
        dictCur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
        self.dbUtil.executeSQL(dictCur, sql)
        conn.commit()
    def countOfDBExports(self, since = None):
        """
        :param since: datetime indicating last export datetime.
        :return: Int of count of exports.
        """
        myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S')
        if not since:
            since = myDatetime('1900-01-01 00:00')
        self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG')

        sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \
              '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format(
            since.strftime('%Y-%m-%d %H:%M'))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        rows = None
        if dbUtil.executeSQL(cursor, sql, exitOnFail = False):
            rows = cursor.fetchall()
        assert len(rows) == 1, 'Invalid return value.'
        return rows[0][0]
Пример #11
0
    def countOfDBExports(self, since=None):
        """
        :param since: datetime indicating last export datetime.
        :return: Int of count of exports.
        """
        myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S')
        if not since:
            since = myDatetime('1900-01-01 00:00')
        self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG')

        sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \
              '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format(
            since.strftime('%Y-%m-%d %H:%M'))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        rows = None
        if dbUtil.executeSQL(cursor, sql, exitOnFail=False):
            rows = cursor.fetchall()
        assert len(rows) == 1, 'Invalid return value.'
        return rows[0][0]
    def logSuccessfulExport(self, name = '', url = '', datetime = 0, size = 0):
        """
        When an export has been successful, log information about the export
        to the database.

        The items to log include:
        * filename
        * URL
        * timestamp
        * filesize

        :param name: String
        :param url: String
        :param datetime:
        :param size: Int
        :return: True if no errors occurred, else False.
        """

        def exportHistoryColumns():
            return ['name', 'url', 'timestamp', 'size']

        timestamp = lambda \
                datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \
                                                                  "'{}'".format(
            datetime)

        sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format(
            self.configer.configOptionValue('Export', 'export_history_table'),
            ','.join(exportHistoryColumns()), "'" + name + "'", "'" + url + "'",
            timestamp(datetime), size)

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        result = dbUtil.executeSQL(cursor, sql, exitOnFail = False)
        conn.commit()
        return result
Пример #13
0
    def logSuccessfulExport(self, name='', url='', datetime=0, size=0):
        """
        When an export has been successful, log information about the export
        to the database.

        The items to log include:
        * filename
        * URL
        * timestamp
        * filesize

        :param name: String
        :param url: String
        :param datetime:
        :param size: Int
        :return: True if no errors occurred, else False.
        """
        def exportHistoryColumns():
            return ['name', 'url', 'timestamp', 'size']

        timestamp = lambda \
                datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \
                                                                  "'{}'".format(
            datetime)

        sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format(
            self.configer.configOptionValue('Export', 'export_history_table'),
            ','.join(exportHistoryColumns()), "'" + name + "'",
            "'" + url + "'", timestamp(datetime), size)

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        result = dbUtil.executeSQL(cursor, sql, exitOnFail=False)
        conn.commit()
        return result
class MSGNOAAWeatherDataInserter(object):
    """
    Performs weather data insertion to a database.
    """
    def __init__(self, testing=False):
        """
        Constructor.
        :param testing: True if testing mode is being used.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.dbUtil = MSGDBUtil()
        self.dupeChecker = MSGWeatherDataDupeChecker()

    def insertDataDict(self, conn, tableName, listOfDataDicts, commit=False):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the db.

        :param conn: A database connection.
        :param tableName: Name of the DB table to be inserted to.
        :param columnsAndValues: Dictionary of columns and values to be
        inserted to the DB.
        :param (optional) commit: A flag indicated that DB transactions will
        be committed.
        :returns: Set of datetimes processed.
        """

        cur = conn.cursor()
        processedDateTimes = set()

        for row in listOfDataDicts:

            # Add a creation timestamp using the SQL function.
            row['created'] = 'NOW()'

            cols = []
            vals = []

            for col in row.keys():
                # Prepare the columns and values for insertion via SQL.

                cols.append(col)
                if (row[col] != 'NULL'):
                    # Surround each value with single quotes...
                    vals.append("'%s'" % row[col])
                else:
                    # Except for NULL values.
                    vals.append("%s" % row[col])

            sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
                tableName, ','.join(cols), ','.join(vals))

            if self.dupeChecker.duplicateExists(cur, row['wban'],
                                                row['datetime'],
                                                row['record_type']):
                self.logger.log("Dupe found, dropping dupe.", 'info')
            else:
                processedDateTimes.add(
                    dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M"))
                if self.dbUtil.executeSQL(cur, sql, exitOnFail=False) is False:
                    # An error occurred.
                    for col in sorted(row.keys()):
                        print "%s: %s" % (col, row[col])
                    sys.exit(-1)

        if commit:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return processedDateTimes
class MECODupeChecker(object):
    """
    Check for duplicate data in the database.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mecoConfig = MSGConfiger()
        self.currentReadingID = 0
        self.dbUtil = MSGDBUtil()


    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows Result froms from a query
        :return last element in the collection
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var

    def eventBranchDupeExists(self, conn, meterName, eventTime):
        """

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param eventTime: Timestamp of event.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Event".event_time,
                        "MeterData".meter_data_id,
                        "EventData".event_data_id
                 FROM ( ( "MeterData" JOIN "EventData" ON (
                        ( "MeterData".meter_data_id = "EventData"
                        .meter_data_id ) ) )
                 JOIN "Event" ON ( ( "EventData".event_data_id = "Event"
                 .event_data_id ) ) )
                 WHERE "MeterData".meter_name = '%s'
                 AND "Event".event_time = '%s' """ % (meterName, eventTime)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False


    def registerBranchDupeExists(self, conn, meterName, readTime,
                                 registerNumber, DEBUG = False):
        """
        Determine if a register branch duplicate exists for a given meter
        name, read time, number tuple.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param readTime: Read time in RegisterRead table.
        :param registerNumber: Corresponds to DB column "number".
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "public"."MeterData".meter_name,
                        "public"."RegisterRead".read_time,
                        "public"."Register"."number"
                 FROM "public"."MeterData"
                 INNER JOIN "public"."RegisterData" ON
                      "public" ."MeterData".meter_data_id = "public"
                      ."RegisterData".meter_data_id
                 INNER JOIN "public"."RegisterRead" ON
                      "public"."RegisterData" .register_data_id = "public"
                      ."RegisterRead".register_data_id
                 INNER JOIN "public"."Tier" ON "public"."RegisterRead"
                 .register_read_id = "public"."Tier" .register_read_id
                 INNER JOIN "public"."Register" ON "public"."Tier".tier_id =
                 "public"."Register".tier_id
                 WHERE "public"."MeterData".meter_name = '%s'
                 AND "public"."RegisterRead".read_time = '%s'
                 AND "public"."Register".number = '%s'
                 """ % (meterName, readTime, registerNumber)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False


    def readingBranchDupeExists(self, conn, meterName, endTime, channel = None,
                                DEBUG = False):
        """
        Duplicate cases:
        1. Tuple (meterID, endTime) exists in the database.
        @DEPRECATED in favor of (2), full meterName-endTime-channel query.

        2. Tuple (meterID, endTime, channel) exists in the database.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param endTime: End time in Interval table.
        :param channel: Required parameter that was previously optional. An
        optional channel is now deprecated.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        if DEBUG:
            print "readingBranchDupeExists():"

        if channel != None:
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id,
                            "Reading".channel,
                            "Reading".reading_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time = '%s' and meter_name = '%s' and
                     channel = '%s'""" % (
                endTime, meterName, channel)

        else:  # deprecated query
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     WHERE "Interval".end_time = '%s' and meter_name =
                     '%s'""" % (
                endTime, meterName)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            assert len(
                rows) < 2, "Dupes should be less than 2, found %s: %s." % (
                len(rows), rows)

            self.currentReadingID = self.getLastElement(rows[0])
            self.logger.log('Reading ID = %s.' % self.currentReadingID,
                            'silent')

            self.logger.log(
                "Duplicate found for meter %s, end time %s, channel %s." % (
                    meterName, endTime, channel), 'silent')
            return True

        else:
            self.logger.log(
                "Found no rows for meter %s, end time %s, channel %s." % (
                    meterName, endTime, channel), 'silent')
            return False


    def readingValuesAreInTheDatabase(self, conn, readingDataDict):
        """
        Given a reading ID, verify that the values associated are present
        in the database.

        Values are from the columns:
            1. channel
            2. raw_value
            3. uom
            4. value

        :param dictionary containing reading values
        :return True if the existing values are the same, otherwise return False
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Reading".reading_id,
                                "Reading".channel,
                                "Reading".raw_value,
                                "Reading".uom,
                                "Reading"."value"
                         FROM "Reading"
                         WHERE "Reading".reading_id = %s""" % (
            self.currentReadingID)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if self.currentReadingID == 0:
            return False

        # assert len(rows) == 1 or len(rows) == 0
        assert len(
            rows) == 1, "Didn't find a matching reading for reading ID %s." %\
                        self.currentReadingID
        if len(rows) == 1:
            self.logger.log("Found %s existing matches." % len(rows), 'silent')

            allEqual = True
            if int(readingDataDict['Channel']) == int(rows[0][1]):
                print "channel equal,"
            else:
                self.logger.log("channel not equal: %s,%s,%s" % (
                    int(readingDataDict['Channel']), int(rows[0][1]),
                    readingDataDict['Channel'] == rows[0][1]), 'debug')
                allEqual = False

            if int(readingDataDict['RawValue']) == int(rows[0][2]):
                print "raw value equal,"
            else:
                self.logger.log("rawvalue not equal: %s,%s,%s" % (
                    int(readingDataDict['RawValue']), int(rows[0][2]),
                    readingDataDict['RawValue'] == rows[0][2]), 'debug')
                allEqual = False

            if readingDataDict['UOM'] == rows[0][3]:
                print "uom equal,"
            else:
                self.logger.log("uom not equal: %s,%s,%s" % (
                    readingDataDict['UOM'], rows[0][3],
                    readingDataDict['UOM'] == rows[0][3]), 'debug')
                allEqual = False

            if self.approximatelyEqual(float(readingDataDict['Value']),
                                       float(rows[0][4]), 0.001):
                self.logger.log("value equal", 'silent')
            else:
                self.logger.log("value not equal: %s,%s,%s" % (
                    float(readingDataDict['Value']), float(rows[0][4]),
                    readingDataDict['Value'] == rows[0][4]), 'debug')
                allEqual = False

            if allEqual:
                return True
            else:
                return False
        else:
            return False


    def approximatelyEqual(self, a, b, tolerance):
        return abs(a - b) < tolerance
    ]

    lineCnt = 0

    with open(filename) as tsv:
        for line in csv.reader(tsv, delimiter="\t"):
            if lineCnt != 0:

                data = line[0:66]

                for i in range(0, 66):

                    if len(data[i]) == 0:
                        data[i] = "NULL"
                    else:
                        data[i] = "'" + data[i] + "'"

                sql = """INSERT INTO "MeterRecords" (%s) VALUES (%s)""" % (",".join(cols), ",".join(data))

                dbUtil.executeSQL(cur, sql)

            lineCnt += 1

    conn.commit()

    msg = "Processed %s lines.\n" % lineCnt
    sys.stderr.write(msg)
    msgBody += msg

    notifier.sendNotificationEmail(msgBody)
class NewDataAggregator(object):
    """
    Perform aggregation of new data for a set of predefined data types (self
    .rawTypes).
    """

    def __init__(self):
        """
        Constructor.
        """
        self.logger = MSGLogger(__name__, 'DEBUG')
        self.aggregator = MSGDataAggregator()
        self.notifier = MSGNotifier()
        self.rawTypes = ['weather', 'egauge', 'circuit', 'irradiance']
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()


    def lastReportDate(self, notificationType):
        """
        Get the last time a notification was reported.

        :param notificationType: string indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        cursor = self.cursor
        sql = """SELECT MAX("notificationTime") FROM "{}" WHERE
        "notificationType" = '{}'""".format(NOTIFICATION_HISTORY_TABLE,
                                            notificationType)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            raise Exception('Exception during getting last report date.')


    def sendNewDataNotification(self, result = None, testing = False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param result: list of dicts containing aggregation results as
        provided by MSGDataAggregator::aggregateNewData.
        :param testing: Use testing mode when True.
        """

        self.logger.log('result {}'.format(result), 'debug')

        lastReportDate = self.lastReportDate(NOTIFICATION_HISTORY_TYPE)

        if not lastReportDate:
            lastReportDate = "never"

        if not result:
            msgBody = '\nNew data has NOT been aggregated in {}. No result ' \
                      'was obtained. This is an error that should be ' \
                      'investigated.'.format(self.connector.dbName)
        else:
            msgBody = '\nNew data has been aggregated in {}.'.format(
                self.connector.dbName)
            msgBody += '\n\n'
            for i in range(len(result)):
                msgBody += 'The new data count for type {} is {} readings' \
                           '.\n'.format(result[i].keys()[0],
                                      result[i].values()[0])
            msgBody += '\n\n'
            msgBody += 'The last report date was %s.' % lastReportDate
            msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing = testing)
        self.saveNotificationTime()


    def saveNotificationTime(self):
        """
        Save a notification event to the notification history.
        """

        cursor = self.cursor
        sql = """INSERT INTO "{}" ("notificationType", "notificationTime")
        VALUES ('{}', NOW())""".format(NOTIFICATION_HISTORY_TABLE,
                                       NOTIFICATION_HISTORY_TYPE)
        success = self.dbUtil.executeSQL(cursor, sql)
        self.conn.commit()
        if not success:
            raise Exception('Exception while saving the notification time.')


    def aggregateNewData(self):
        """
        :return: list of dicts obtained from
        MSGDataAggregator::aggregateNewData.
        """

        result = map(self.aggregator.aggregateNewData, self.rawTypes)

        self.logger.log('result {}'.format(result))
        return result
            if lineCnt != 0: # Skip header.
                data = line[0:len(cols)] # Overshoot columns to get the last column.

                for i in range(0, len(cols)):
                    if len(data[i]) == 0:
                        data[i] = 'NULL'
                    else:
                        # Escape single quotes with double single quotes in
                        # PostgreSQL.
                        data[i] = data[i].replace("'", "\'\'")
                        data[i] = "'" + data[i] + "'"

                sql = """INSERT INTO "MeterLocationHistory" (%s) VALUES (%s)""" % (
                    ','.join(cols), ','.join(data))
                logger.log("SQL: %s" % sql, 'debug')
                success = dbUtil.executeSQL(cur, sql)
                if not success:
                    anyFailure = True

            lineCnt += 1

    conn.commit()

    msg = ("Processed %s lines.\n" % lineCnt)
    sys.stderr.write(msg)
    msgBody += msg

    if not anyFailure:
        msg = "Finished inserting Meter Location History records.\n"
        sys.stderr.write(msg)
        msgBody += msg
Пример #19
0
    lineCnt = 0

    with open(filename) as tsv:
        for line in csv.reader(tsv, delimiter="\t"):
            if lineCnt != 0:

                data = line[0:66]

                for i in range(0, 66):

                    if len(data[i]) == 0:
                        data[i] = 'NULL'
                    else:
                        data[i] = "'" + data[i] + "'"

                sql = """INSERT INTO "MeterRecords" (%s) VALUES (%s)""" % (
                    ','.join(cols), ','.join(data))

                dbUtil.executeSQL(cur, sql)

            lineCnt += 1

    conn.commit()

    msg = ("Processed %s lines.\n" % lineCnt)
    sys.stderr.write(msg)
    msgBody += msg

    notifier.sendNotificationEmail(msgBody)
Пример #20
0
class MSGDataVerifier(object):
    """
    Perform verification procedures related to data integrity.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG')
        self.cursor = MSGDBConnector().connectDB().cursor()
        self.dbUtil = MSGDBUtil()

    def mecoReadingsDupeCount(self):
        """
        Generate counts of MECO dupe readings.
        """

        dupes = 0
        startDate = lambda y, m: '%d-%02d-%02d' % (y, m, 1)
        endDate = lambda y, m: '%d-%02d-%02d' % (
            y, m, calendar.monthrange(y, m)[1])

        for y in YEARS:
            startDates = [startDate(y, m) for m in
                          map(lambda x: x + 1, range(12))]
            endDates = [endDate(y, m) for m in map(lambda x: x + 1, range(12))]

            for start in startDates:
                cnt = self.__mecoReadingsDupeCount(start, endDates[
                    startDates.index(start)])
                self.logger.log('start: %s, dupe cnt: %s' % (start, cnt),
                                'INFO')
                dupes += cnt

        return dupes


    def __mecoReadingsDupeCount(self, startDate, endDate):
        """

        :param startDate:
        :param endDate:
        :returns: DB row count.
        """

        self.dbUtil.executeSQL(self.cursor, """SELECT "Interval".end_time,
                            "MeterData".meter_name,
                            "Reading".channel
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id = "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time BETWEEN '%s' and '%s'
                     GROUP BY "MeterData".meter_name,
                     "Interval".end_time,
                     "Reading".channel
                     HAVING (COUNT(*) > 1)""" % (startDate, endDate))
        return len(self.cursor.fetchall())


    def egaugeAggregationCount(self):
        """
        There should not be more than 96 15-min interval endpoints within a
        single calendar day for a given sub ID.
        :return:
        """
        pass
Пример #21
0
class MSGEgaugeNewDataChecker(object):
    """
    Provide notification of newly loaded MSG eGauge data.

    This uses notification type MSG_EGAUGE_SERVICE.
    """
    def __init__(self):
        """
        Constructor.
        """

        print __name__
        self.logger = SEKLogger(__name__)
        self.connector = MSGDBConnector()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.configer = MSGConfiger()

    def newDataCount(self):
        """
        Measure the amount of new data that is present since the last time
        new data was reported.
        """

        cursor = self.connector.conn.cursor()
        tableName = 'EgaugeEnergyAutoload'
        lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE')
        if lastTime is None:
            lastTime = '1900-01-01'
        sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % (
            tableName, lastTime)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return 0
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None

    def lastReportDate(self, notificationType):
        """
        Get the last time a notification was reported.

        :param notificationType: A string indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        cursor = self.connector.conn.cursor()
        sql = """SELECT MAX("notificationTime") FROM "%s" WHERE
        "notificationType" = '%s'""" % (NOTIFICATION_HISTORY_TABLE,
                                        notificationType)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None

    def saveNotificationTime(self):
        """
        Save the notification event to the notification history.
        """

        cursor = self.connector.conn.cursor()
        sql = """INSERT INTO "%s" ("notificationType", "notificationTime")
        VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE
        success = self.dbUtil.executeSQL(cursor, sql)
        self.connector.conn.commit()
        if not success:
            # @todo Raise an exception.
            self.logger.log(
                'An error occurred while saving the notification time.')

    def sendNewDataNotification(self, testing=False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param testing: Use testing mode when True.
        """

        lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE')

        if not lastReportDate:
            lastReportDate = "never"

        msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\
            .connector.dbName
        msgBody += '\n\n'
        msgBody += 'The new data count is %s readings.' % self.newDataCount()
        msgBody += '\n\n'
        msgBody += 'The last report date was %s.' % lastReportDate
        msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing=testing)
        self.saveNotificationTime()
class MSGWeatherDataUtil(object):
    """
    Utility methods for working with weather data.
    """
    def __init__(self):
        """
        Constructor.

        A database connection is not maintained here to keep this class
        lightweight.
        """

        self.logger = SEKLogger(__name__, DEBUG)
        self.configer = MSGConfiger()
        self.url = self.configer.configOptionValue('Weather Data',
                                                   'weather_data_url')
        self.pattern = self.configer.configOptionValue('Weather Data',
                                                       'weather_data_pattern')
        self.fileList = []
        self.dateList = []  # List of dates corresponding weather data files.
        self.fillFileListAndDateList()
        self.dbUtil = MSGDBUtil()

    def fillFileListAndDateList(self):
        """
        Return a list of weather files obtained from the remote server used
        in processing weather data.
        """

        response = urllib2.urlopen(self.url).read()

        self.logger.log('Filling file list:', DEBUG)
        for filename in re.findall(self.pattern, response):
            # Only examine first match group in the filename match.
            self.logger.log('filename {}'.format(filename[0]), DEBUG)
            self.fileList.append(filename[0])
            self.dateList.append(self.datePart(filename[0]))

    def datePart(self, filename=None, datetime=None):
        """
        Return the date part of a NOAA weather data filename.

        :param: String of the filename.
        :param: datetime object.
        :returns: String of the date part of the given parameter.
        """

        assert filename == None or datetime == None, "One argument is allowed."
        if filename:
            newName = filename.replace("QCLCD", '')
            newName = newName.replace(".zip", '')
            return newName
        if datetime:
            return datetime.strftime('%Y-%m-%d')

    def getLastDateLoaded(self, cursor):
        """
        Return the last date of loaded weather data.

        :returns: Last date.
        """

        sql = """select wban, datetime, record_type from "%s"
                 ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE

        self.dbUtil.executeSQL(cursor, sql)
        row = cursor.fetchone()
        # self.logger.log('Date last loaded = %s' % row[1], 'info')
        return row[1]

    def getKeepList(self, fileList, cursor):
        """
        The Keep List is the list of filenames of files containing data that are
        *within* the month of the last loaded date or are beyond the last loaded
        date.

        :param: fileList: A list of files containing weather data.
        :param: DB cursor.
        :returns: List of weather data filenames to process.
        """

        keepList = []
        i = 0
        for date in fileList:
            self.logger.log('Examining date %s.' % date)

            # The list date should be the last day of the month.
            # It is the date that is compared against the last retrieved date.

            listDate = dt.datetime.strptime(self.datePart(filename=date),
                                            "%Y%m")
            lastDay = calendar.monthrange(listDate.year, listDate.month)[1]
            listDate = dt.datetime.strptime(
                '%s-%s-%s' % (listDate.year, listDate.month, lastDay),
                "%Y-%m-%d")
            self.logger.log('List date = %s.' % listDate)
            lastDate = self.getLastDateLoaded(cursor)

            self.logger.log('last date = %s' % lastDate)

            if lastDate <= listDate:
                keepList.append((i, listDate))

            i += 1

        if keepList:
            keepList.sort()

        return [fileList[d[0]] for d in keepList]
Пример #23
0
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """
    def __init__(self,
                 exitOnError=True,
                 commitOnEveryInsert=False,
                 testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = 'Aggregation'
        tableList = [
            'irradiance', 'agg_irradiance', 'weather', 'agg_weather',
            'circuit', 'agg_circuit', 'egauge', 'agg_egauge'
        ]
        self.dataParams = {
            'weather': ('agg_weather', 'timestamp', ''),
            'egauge': ('agg_egauge', 'datetime', 'egauge_id'),
            'circuit': ('agg_circuit', 'timestamp', 'circuit'),
            'irradiance': ('agg_irradiance', 'timestamp', 'sensor_id')
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {
            t: self.configer.configOptionValue(section, '{}_table'.format(t))
            for t in tableList
        }

        for t in self.tables.keys():
            self.logger.log('t:{}'.format(t), 'DEBUG')
            try:
                self.columns[t] = self.dbUtil.columnsString(
                    self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log(
                    'Ignoring missing table: Error is {}.'.format(error),
                    'error')

    def existingIntervals(self, aggDataType='', timeColumnName=''):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0] for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName))
        ]

    def unaggregatedIntervalCount(self,
                                  dataType='',
                                  aggDataType='',
                                  timeColumnName='',
                                  idColumnName=''):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(
            self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName,
                                       idColumnName))

    def lastAggregationEndpoint(self, aggDataType='', timeColumnName=''):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType,
                                      timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self,
                              dataType='',
                              aggDataType='',
                              timeColumnName='',
                              idColumnName=''):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != '':
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = 'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON ' \
                  '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{' \
                  '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, ' \
                  '{3} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y[0].timetuple()[0:5],
                        filter(
                            lambda x: x[0].timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(
                                x[0], x[1]) for x in self.rows(
                                    sql.format(
                                        self.tables[dataType],
                                        self.tables[aggDataType],
                                        timeColumnName, idColumnName,
                                        self.lastAggregationEndpoint(
                                            aggDataType, timeColumnName)))])))
            ])
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = 'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}=' \
                  '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' ' \
                  'ORDER BY {2} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y.timetuple()[0:5],
                        filter(
                            lambda x: x.timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(x[0]) for x in self.rows(
                                sql.format(
                                    self.tables[dataType],
                                    self.tables[aggDataType], timeColumnName,
                                    self.lastAggregationEndpoint(
                                        aggDataType, timeColumnName)))])))
            ])

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception('Minute not defined.')

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if minute >= self.nextMinuteCrossing[subkey] and minute <= last \
                    and \
                            self.nextMinuteCrossing[subkey] != first:
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log('minute crossed at #1.', 'debug')
                return True
            elif self.nextMinuteCrossing[
                    subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log('minute crossed at #2.', 'debug')
                return True
            return False
        else:
            if minute >= self.nextMinuteCrossingWithoutSubkeys and minute <= \
                    last and self.nextMinuteCrossingWithoutSubkeys != first:
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log('minute crossed at #3.', 'debug')
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >=\
                    first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log('minute crossed at #4.', 'debug')
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log('sql: {}'.format(sql), 'debug')
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self,
                dataType='',
                orderBy=None,
                timestampCol='',
                startDate='',
                endDate=''):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows("""SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(self.columns[dataType],
                                   self.tables[dataType], timestampCol,
                                   startDate, endDate, ','.join(orderBy)))

    def subkeys(self,
                dataType='',
                timestampCol='',
                subkeyCol='',
                startDate='',
                endDate=''):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0] for sk in self.rows("""SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(subkeyCol, self.tables[dataType],
                                  timestampCol, startDate, endDate, subkeyCol))
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception('agg columns not defined.')
        if not agg.data:
            raise Exception('agg data not defined.')

        self.logger.log('agg data: {}'.format(agg.data))
        self.logger.log('agg data type: {}'.format(type(agg.data)))

        def __insertData(values=''):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ','.join(agg.columns),
                values)
            self.logger.log('sql: {}'.format(sql), 'debug')
            success = self.dbUtil.executeSQL(self.cursor,
                                             sql,
                                             exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception('Failure during aggregated data insert.')

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ''
                    valCnt = 0
                    for val in row[key]:
                        if val == 'NULL':
                            values += val
                        elif type(val) == type(''):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ''
                valCnt = 0
                for val in row:
                    if val == 'NULL':
                        values += val
                    elif type(val) == type(''):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log('row = {}'.format(row), 'error')
                raise Exception('Row type not matched.')

        # End for row.
        self.conn.commit()

    def intervalAverages(self,
                         sums,
                         cnts,
                         timestamp,
                         timestampIndex,
                         subkeyIndex=None,
                         subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log('key: {}'.format(subkey), 'debug')
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[subkey][sumIndex]), 'debug')
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append('NULL')
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[sumIndex]), 'debug')
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append('NULL')
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=''):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log('Unmatched data type {}.'.format(dataType))

    def aggregateAllData(self, dataType=''):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName,
                                                  dataType=dataType):
            self.logger.log('start, end: {}, {}'.format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime('%Y-%m-%d %H:%M:%S'),
                endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log('aggData row: {}'.format(row))

    def aggregateNewData(self, dataType=''):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = \
            self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            'datatype: {}; start, end: {}, {}; end type: {}'.format(
                dataType, start, end, type(end)), 'critical')

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime(
                '%Y-%m-%d %H:%M:%S'),
            endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log('aggData row: {}'.format(row))

        self.logger.log('{} rows aggregated for {}.'.format(
            len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=''):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log('datatype {}'.format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log('subkey colname {}'.format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType,
            aggDataType=aggType,
            timeColumnName=timeColName,
            idColumnName=subkeyColName)

        self.logger.log('unagg endpoints: {}'.format(unAggregatedEndpoints))
        return {
            dataType:
            (unAggregatedEndpoints[-1]
             if unAggregatedEndpoints != [] else None,
             self.lastAggregationEndpoint(aggDataType=aggType,
                                          timeColumnName=timeColName))
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints,
                         [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName='', dataType=''):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log('datatype {}'.format(dataType), 'debug')
        (start,
         end) = self.rows("""SELECT MIN({}), MAX({}) FROM \"{}\"""".format(
             timeColumnName, timeColumnName, self.tables[dataType]))[0]
        self.logger.log('start {}'.format(start))
        self.logger.log('end {}'.format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (splitDates[i][0],
                 self.incrementEndpoint(
                     datetime(splitDates[i][1].timetuple()[0],
                              splitDates[i][1].timetuple()[1],
                              splitDates[i][1].timetuple()[2], 23, 59, 59))))
            i += 1

        return startEndDatesTransform

    def aggregatedData(self,
                       dataType='',
                       aggregationType='',
                       timeColumnName='',
                       subkeyColumnName='',
                       startDate='',
                       endDate=''):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(',').index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(dataType=dataType,
                                     timestampCol=timeColumnName,
                                     subkeyCol=subkeyColumnName,
                                     startDate=startDate,
                                     endDate=endDate)

        self.logger.log('subkeys: {}'.format(mySubkeys), 'debug')

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(','))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(','))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log('subkeys to check: {}'.format(subkeysToCheck),
                            'debug')

            if mySubkeys:
                for row in self.rawData(
                        dataType=dataType,
                        orderBy=[timeColumnName, subkeyColumnName],
                        timestampCol=timeColumnName,
                        startDate=startDate,
                        endDate=endDate):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(
                            timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 0
                        else:
                            raise Exception(
                                'Unable to determine next minute crossing')
                        self.logger.log(
                            'next min crossing for {} = {}'.format(
                                row[ci(subkeyColumnName)],
                                self.nextMinuteCrossing[row[ci(
                                    subkeyColumnName)]]), 'debug')
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(dataType=dataType,
                                        orderBy=[timeColumnName],
                                        timestampCol=timeColumnName,
                                        startDate=startDate,
                                        endDate=endDate):
                    minute = row[ci(
                        timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception(
                            'Unable to determine next minute crossing')
                    self.logger.log(
                        'next min crossing = {}'.format(
                            self.nextMinuteCrossingWithoutSubkeys), 'debug')
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(dataType=dataType,
                                orderBy=[timeColumnName, subkeyColumnName],
                                timestampCol=timeColumnName,
                                startDate=startDate,
                                endDate=endDate):

            if mySubkeys:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(
                            row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute,
                                        subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log(
                        'key: {}'.format(row[ci(subkeyColumnName)]), 'debug')
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName),
                                              ci(subkeyColumnName),
                                              row[ci(subkeyColumnName)])
                    ]
                    self.logger.log('minute crossed {}'.format(minuteCrossed),
                                    'DEBUG')

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum,
                     cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)],
                                              sums=sum,
                                              cnts=cnt)
            else:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName))
                    ]
                    (sum, cnt) = __initSumAndCount(subkey=None,
                                                   sums=sum,
                                                   cnts=cnt)

            rowCnt += 1

        self.logger.log('aggdata = {}'.format(aggData), 'debug')
        return MSGAggregatedData(aggregationType=aggregationType,
                                 columns=self.columns[dataType].split(','),
                                 data=aggData)
Пример #24
0
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """

    def __init__(self):
        """
        Constructor.
        """

        self.logger = MSGLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self, conn, tableName, columnsAndValues, fKeyVal = None,
                   withoutCommit = 0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col], # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col] # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col] # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append("'%s'" % dbColsAndVals[
                    col]) # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
        tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
Пример #25
0
class MSGNotifier(object):
    """
    Provides notification service functionality for MSG data processing.

    Email settings are stored in the local configuration.

    Usage:

    from msg_notifier import MSGNotifier
    self.notifier = MSGNotifier()

    Public API:

    sendNotificationEmail(msgBody, testing = False):
        Send msgBody as a notification to the mailing list defined in the
        config file.

    sendMailWithAttachments(msgBody, files = None, testing = False)
        Send msgBody with files attached as a notification to the mailing
        list defined in the config file.

    lastReportDate(noticeType):
        The last date where a notification of the given type was reported.

    recordNotificationEvent(noticeType):
        Record an event in the notification history.
    """


    def __init__(self):
        """
        Constructor.
        """

        warnings.simplefilter('default')
        warnings.warn("This module is deprecated in favor of SEKNotifier.",
                      DeprecationWarning)

        self.config = MSGConfiger()
        self.logger = SEKLogger(__name__, 'info')
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.noticeTable = 'NotificationHistory'
        self.notificationHeader = "This is a message from the Hawaii Smart " \
                                  "Energy Project MSG Project notification " \
                                  "system.\n\n"

        self.noReplyNotice = '\n\nThis email account is not monitored. No ' \
                             'replies will originate from this ' \
                             'account.\n\nYou are receiving this message ' \
                             'because you are on the recipient list for ' \
                             'notifications for the Hawaii Smart Energy ' \
                             'Project.'


    def sendNotificationEmail(self, msgBody, testing = False):
        """
        This method is an alternative to the multipart method in
        sendMailWithAttachments.

        :param msgBody: The body of the message to be sent.
        :param testing: True if running in testing mode.
        :returns: True for success, False for an error.
        """

        errorOccurred = False
        user = self.config.configOptionValue('Notifications', 'email_username')
        password = self.config.configOptionValue('Notifications',
                                                 'email_password')
        fromaddr = self.config.configOptionValue('Notifications',
                                                 'email_from_address')

        if testing:
            toaddr = self.config.configOptionValue('Notifications',
                                                   'testing_email_recipients')
        else:
            toaddr = self.config.configOptionValue('Notifications',
                                                   'email_recipients')
        server = smtplib.SMTP(self.config.configOptionValue('Notifications',
                                                            'smtp_server_and_port'))

        try:
            server.starttls()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP STARTTLS: {}".format(detail),
                            'ERROR')

        try:
            server.login(user, password)
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR')

        senddate = datetime.now().strftime('%Y-%m-%d')
        subject = "HISEP Notification"

        msgHeader = "Date: {}\r\nFrom: {}\r\nTo: {}\r\nSubject: {" \
                    "}\r\nX-Mailer: My-Mail\r\n\r\n".format(senddate, fromaddr,
                                                            toaddr, subject)

        msgBody = self.notificationHeader + msgBody

        msgBody += self.noReplyNotice

        try:
            self.logger.log("Send email notification.", 'INFO')
            server.sendmail(fromaddr, toaddr, msgHeader + msgBody)
            server.quit()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP sendmail: {}".format(detail),
                            'ERROR')

        return errorOccurred != True


    def sendMailWithAttachments(self, msgBody, files = None, testing = False):
        """
        Send email along with attachments.

        :param msgBody: String containing the body of the messsage to send.
        :param files: List of file paths. This is a mutable argument that
        should be handled carefully as the default is defined only once.
        :param testing: True if running in testing mode.
        :returns: True if no exceptions are raised.
        """

        if files is None:
            files = []

        sys.stderr.write("Sending multipart email.\n")
        if testing:
            self.logger.log("Notification testing mode is ON.\n", 'info')

        errorOccurred = False
        assert type(files) == list

        user = self.config.configOptionValue('Notifications', 'email_username')
        password = self.config.configOptionValue('Notifications',
                                                 'email_password')

        if testing:
            send_to = self.config.configOptionValue('Notifications',
                                                    'testing_email_recipients')
        else:
            send_to = self.config.configOptionValue('Notifications',
                                                    'email_recipients')

        send_from = self.config.configOptionValue('Notifications',
                                                  'email_from_address')

        msg = MIMEMultipart()
        msg['From'] = send_from
        msg['To'] = send_to
        msg['Date'] = formatdate(localtime = True)
        msg['Subject'] = "HISEP Notification"

        msg.attach(MIMEText(msgBody))

        for f in files:
            sys.stderr.write("Attaching file %s.\n" % f)
            part = MIMEBase('application', "octet-stream")
            part.set_payload(open(f, "rb").read())
            Encoders.encode_base64(part)
            part.add_header('Content-Disposition',
                            'attachment; filename="%s"' % os.path.basename(f))
            msg.attach(part)

        server = smtplib.SMTP(self.config.configOptionValue('Notifications',
                                                            'smtp_server_and_port'))
        try:
            server.starttls()
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP STARTTLS: %s" % detail,
                            'ERROR')

        try:
            server.login(user, password)
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP login: %s" % detail, 'ERROR')

        self.logger.log("Send email notification.", 'INFO')

        try:
            server.sendmail(send_from, send_to, msg.as_string())
        except smtplib.SMTPException as detail:
            errorOccurred = True
            self.logger.log("Exception during SMTP sendmail: %s" % detail,
                            'ERROR')

        server.quit()

        if errorOccurred == False:
            self.logger.log('No exceptions occurred.\n', 'info')

        return errorOccurred


    def recordNotificationEvent(self, noticeType = None):
        """
        Save a notification event to the notification history.
        :param table: String
        :param noticeType: <enum 'MSGNotificationHistoryTypes'>
        :returns: Boolean
        """

        if not noticeType:
            return False
        if not noticeType in MSGNotificationHistoryTypes:
            return False

        cursor = self.cursor
        sql = """INSERT INTO "{}" ("notificationType", "notificationTime")
        VALUES ('{}', NOW())""".format(self.noticeTable, noticeType.name)
        success = self.dbUtil.executeSQL(cursor, sql)
        self.conn.commit()
        if not success:
            raise Exception('Exception while saving the notification time.')
        return success


    def lastReportDate(self, noticeType = None):
        """
        Get the last time a notification was reported for the given
        noticeType.

        :param noticeType: String indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        if not noticeType or (not noticeType in MSGNotificationHistoryTypes):
            raise Exception('Invalid notice type.')

        cursor = self.cursor

        sql = 'SELECT MAX("notificationTime") FROM "{}" WHERE ' \
              '"notificationType" = \'{}\''.format(self.noticeTable,
                                                   noticeType.name)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            raise Exception('Exception during getting last report date.')
class MSGEgaugeNewDataChecker(object):
    """
    Provide notification of newly loaded MSG eGauge data.

    This uses notification type MSG_EGAUGE_SERVICE.
    """

    def __init__(self):
        """
        Constructor.
        """

        print __name__
        self.logger = SEKLogger(__name__)
        self.connector = MSGDBConnector()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.configer = MSGConfiger()


    def newDataCount(self):
        """
        Measure the amount of new data that is present since the last time
        new data was reported.
        """

        cursor = self.connector.conn.cursor()
        tableName = 'EgaugeEnergyAutoload'
        lastTime = self.lastReportDate('MSG_EGAUGE_SERVICE')
        if lastTime is None:
            lastTime = '1900-01-01'
        sql = """SELECT COUNT(*) FROM "%s" WHERE datetime > '%s'""" % (
            tableName, lastTime)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return 0
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None


    def lastReportDate(self, notificationType):
        """
        Get the last time a notification was reported.

        :param notificationType: A string indicating the type of the
        notification. It is stored in the event history.
        :returns: datetime of last report date.
        """

        cursor = self.connector.conn.cursor()
        sql = """SELECT MAX("notificationTime") FROM "%s" WHERE
        "notificationType" = '%s'""" % (
            NOTIFICATION_HISTORY_TABLE, notificationType)

        success = self.dbUtil.executeSQL(cursor, sql)
        if success:
            rows = cursor.fetchall()

            if not rows[0][0]:
                return None
            else:
                return rows[0][0]
        else:
            # @todo Raise an exception.
            return None


    def saveNotificationTime(self):
        """
        Save the notification event to the notification history.
        """

        cursor = self.connector.conn.cursor()
        sql = """INSERT INTO "%s" ("notificationType", "notificationTime")
        VALUES ('MSG_EGAUGE_SERVICE', NOW())""" % NOTIFICATION_HISTORY_TABLE
        success = self.dbUtil.executeSQL(cursor, sql)
        self.connector.conn.commit()
        if not success:
            # @todo Raise an exception.
            self.logger.log(
                'An error occurred while saving the notification time.')


    def sendNewDataNotification(self, testing = False):
        """
        Sending notification reporting on new data being available since the
        last time new data was reported.

        :param testing: Use testing mode when True.
        """

        lastReportDate = self.lastReportDate('MSG_EGAUGE_SERVICE')

        if not lastReportDate:
            lastReportDate = "never"

        msgBody = '\nNew MSG eGauge data has been loaded to %s.' % self\
            .connector.dbName
        msgBody += '\n\n'
        msgBody += 'The new data count is %s readings.' % self.newDataCount()
        msgBody += '\n\n'
        msgBody += 'The last report date was %s.' % lastReportDate
        msgBody += '\n\n'
        self.notifier.sendNotificationEmail(msgBody, testing = testing)
        self.saveNotificationTime()
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """

    def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, "info")
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = "Aggregation"
        tableList = [
            "irradiance",
            "agg_irradiance",
            "weather",
            "agg_weather",
            "circuit",
            "agg_circuit",
            "egauge",
            "agg_egauge",
        ]
        self.dataParams = {
            "weather": ("agg_weather", "timestamp", ""),
            "egauge": ("agg_egauge", "datetime", "egauge_id"),
            "circuit": ("agg_circuit", "timestamp", "circuit"),
            "irradiance": ("agg_irradiance", "timestamp", "sensor_id"),
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {t: self.configer.configOptionValue(section, "{}_table".format(t)) for t in tableList}

        for t in self.tables.keys():
            self.logger.log("t:{}".format(t), "DEBUG")
            try:
                self.columns[t] = self.dbUtil.columnsString(self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log("Ignoring missing table: Error is {}.".format(error), "error")

    def existingIntervals(self, aggDataType="", timeColumnName=""):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0]
            for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName
                )
            )
        ]

    def unaggregatedIntervalCount(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName, idColumnName))

    def lastAggregationEndpoint(self, aggDataType="", timeColumnName=""):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType, timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != "":
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = (
                'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON '
                '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{'
                '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, '
                "{3} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y[0].timetuple()[0:5],
                            filter(
                                lambda x: x[0].timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0], x[1])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            idColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = (
                'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}='
                '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' '
                "ORDER BY {2} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y.timetuple()[0:5],
                            filter(
                                lambda x: x.timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception("Minute not defined.")

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if (
                minute >= self.nextMinuteCrossing[subkey]
                and minute <= last
                and self.nextMinuteCrossing[subkey] != first
            ):
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log("minute crossed at #1.", "debug")
                return True
            elif self.nextMinuteCrossing[subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log("minute crossed at #2.", "debug")
                return True
            return False
        else:
            if (
                minute >= self.nextMinuteCrossingWithoutSubkeys
                and minute <= last
                and self.nextMinuteCrossingWithoutSubkeys != first
            ):
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log("minute crossed at #3.", "debug")
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log("minute crossed at #4.", "debug")
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log("sql: {}".format(sql), "debug")
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self, dataType="", orderBy=None, timestampCol="", startDate="", endDate=""):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows(
            """SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(
                self.columns[dataType], self.tables[dataType], timestampCol, startDate, endDate, ",".join(orderBy)
            )
        )

    def subkeys(self, dataType="", timestampCol="", subkeyCol="", startDate="", endDate=""):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0]
            for sk in self.rows(
                """SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(
                    subkeyCol, self.tables[dataType], timestampCol, startDate, endDate, subkeyCol
                )
            )
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception("agg columns not defined.")
        if not agg.data:
            raise Exception("agg data not defined.")

        self.logger.log("agg data: {}".format(agg.data))
        self.logger.log("agg data type: {}".format(type(agg.data)))

        def __insertData(values=""):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ",".join(agg.columns), values
            )
            self.logger.log("sql: {}".format(sql), "debug")
            success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception("Failure during aggregated data insert.")

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ""
                    valCnt = 0
                    for val in row[key]:
                        if val == "NULL":
                            values += val
                        elif type(val) == type(""):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ""
                valCnt = 0
                for val in row:
                    if val == "NULL":
                        values += val
                    elif type(val) == type(""):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log("row = {}".format(row), "error")
                raise Exception("Row type not matched.")

        # End for row.
        self.conn.commit()

    def intervalAverages(self, sums, cnts, timestamp, timestampIndex, subkeyIndex=None, subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log("key: {}".format(subkey), "debug")
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[subkey][sumIndex]), "debug")
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append("NULL")
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[sumIndex]), "debug")
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append("NULL")
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=""):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log("Unmatched data type {}.".format(dataType))

    def aggregateAllData(self, dataType=""):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName, dataType=dataType):
            self.logger.log("start, end: {}, {}".format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime("%Y-%m-%d %H:%M:%S"),
                endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
            )
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log("aggData row: {}".format(row))

    def aggregateNewData(self, dataType=""):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            "datatype: {}; start, end: {}, {}; end type: {}".format(dataType, start, end, type(end)), "critical"
        )

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime("%Y-%m-%d %H:%M:%S"),
            endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
        )
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log("aggData row: {}".format(row))

        self.logger.log("{} rows aggregated for {}.".format(len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=""):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log("datatype {}".format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log("subkey colname {}".format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType, aggDataType=aggType, timeColumnName=timeColName, idColumnName=subkeyColName
        )

        self.logger.log("unagg endpoints: {}".format(unAggregatedEndpoints))
        return {
            dataType: (
                unAggregatedEndpoints[-1] if unAggregatedEndpoints != [] else None,
                self.lastAggregationEndpoint(aggDataType=aggType, timeColumnName=timeColName),
            )
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints, [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName="", dataType=""):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log("datatype {}".format(dataType), "debug")
        (start, end) = self.rows(
            """SELECT MIN({}), MAX({}) FROM \"{}\"""".format(timeColumnName, timeColumnName, self.tables[dataType])
        )[0]
        self.logger.log("start {}".format(start))
        self.logger.log("end {}".format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (
                    splitDates[i][0],
                    self.incrementEndpoint(
                        datetime(
                            splitDates[i][1].timetuple()[0],
                            splitDates[i][1].timetuple()[1],
                            splitDates[i][1].timetuple()[2],
                            23,
                            59,
                            59,
                        )
                    ),
                )
            )
            i += 1

        return startEndDatesTransform

    def aggregatedData(
        self, dataType="", aggregationType="", timeColumnName="", subkeyColumnName="", startDate="", endDate=""
    ):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(",").index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(
                dataType=dataType,
                timestampCol=timeColumnName,
                subkeyCol=subkeyColumnName,
                startDate=startDate,
                endDate=endDate,
            )

        self.logger.log("subkeys: {}".format(mySubkeys), "debug")

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(","))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(","))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log("subkeys to check: {}".format(subkeysToCheck), "debug")

            if mySubkeys:
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName, subkeyColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 0
                        else:
                            raise Exception("Unable to determine next minute crossing")
                        self.logger.log(
                            "next min crossing for {} = {}".format(
                                row[ci(subkeyColumnName)], self.nextMinuteCrossing[row[ci(subkeyColumnName)]]
                            ),
                            "debug",
                        )
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):
                    minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception("Unable to determine next minute crossing")
                    self.logger.log("next min crossing = {}".format(self.nextMinuteCrossingWithoutSubkeys), "debug")
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(
            dataType=dataType,
            orderBy=[timeColumnName, subkeyColumnName],
            timestampCol=timeColumnName,
            startDate=startDate,
            endDate=endDate,
        ):

            if mySubkeys:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute, subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log("key: {}".format(row[ci(subkeyColumnName)]), "debug")
                    aggData += [
                        self.intervalAverages(
                            sum,
                            cnt,
                            row[ci(timeColumnName)],
                            ci(timeColumnName),
                            ci(subkeyColumnName),
                            row[ci(subkeyColumnName)],
                        )
                    ]
                    self.logger.log("minute crossed {}".format(minuteCrossed), "DEBUG")

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum, cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)], sums=sum, cnts=cnt)
            else:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName))]
                    (sum, cnt) = __initSumAndCount(subkey=None, sums=sum, cnts=cnt)

            rowCnt += 1

        self.logger.log("aggdata = {}".format(aggData), "debug")
        return MSGAggregatedData(
            aggregationType=aggregationType, columns=self.columns[dataType].split(","), data=aggData
        )
Пример #28
0
class MECODBInserter(object):
    """
    Provides methods that perform insertion of MECO data.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mapper = MECOMapper()
        self.dupeChecker = MECODupeChecker()
        self.dbUtil = MSGDBUtil()

    def __call__(self, param):
        print "CallableClass.__call__(%s)" % param

    def insertData(self,
                   conn,
                   tableName,
                   columnsAndValues,
                   fKeyVal=None,
                   withoutCommit=0):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the DB.

        :param conn: database connection
        :param tableName: name of the db table
        :param columnsAndValues: dictionary of columns and values to be
        inserted to the db
        :param (optional) fKeyVal: an explicit foreign key value
        :param (optional) withoutCommit: a flag indicated that the insert
        will not be immediately committed
        :returns: A database cursor.
        """

        cur = conn.cursor()

        # Get a dictionary of mapped (from DB to source data) column names.
        columnDict = self.mapper.getDBColNameDict(tableName)

        dbColsAndVals = {}

        if VISUALIZE_DATA:
            print "----------" + tableName + "----------"
            print columnDict
            print columnsAndValues

        for col in columnDict.keys():

            # Use default as the value for the primary key so that the
            # private key is obtained from the predefined sequence.
            if col == '_pkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print 'DEFAULT'
                dbColsAndVals[columnDict[col]] = 'DEFAULT'

            # For the foreign key, set the value from the given parameter.
            elif col == '_fkey':
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.
                    print fKeyVal
                dbColsAndVals[columnDict[col]] = fKeyVal

            else:
                if VISUALIZE_DATA:
                    print columnDict[col],  # DB col name.

                # The Register and Reading tables need to handle NULL
                # values as a special case.
                if tableName == 'Register' or tableName == 'Reading':
                    try:
                        if VISUALIZE_DATA:
                            print columnsAndValues[col]  # data source value
                        dbColsAndVals[columnDict[col]] = columnsAndValues[col]
                    except:
                        if VISUALIZE_DATA:
                            print 'NULL'
                        dbColsAndVals[columnDict[col]] = 'NULL'

                # For all other cases, simply pass the value.
                else:
                    if VISUALIZE_DATA:
                        print columnsAndValues[col]  # data source value
                    dbColsAndVals[columnDict[col]] = columnsAndValues[col]

        # Add a creation timestamp to MeterData.
        if tableName == 'MeterData':
            dbColsAndVals['created'] = 'NOW()'

        cols = []
        vals = []
        for col in dbColsAndVals.keys():
            cols.append(col)

            # DEFAULT, NULL and NOW() need to appear without quotes.
            if dbColsAndVals[col] in {'DEFAULT', 'NULL', 'NOW()'}:
                vals.append(dbColsAndVals[col])
            else:
                vals.append(
                    "'%s'" %
                    dbColsAndVals[col])  # Surround value with single quotes.

        sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
            tableName, ','.join(cols), ','.join(vals))

        self.dbUtil.executeSQL(cur, sql)

        if withoutCommit == 0:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return cur
class MSGDBUtilTester(unittest.TestCase):
    """
    Unit tests for MECO DB Utils.
    """

    def setUp(self):
        self.i = MECODBInserter()

        # Connect to the testing database.
        self.connector = MSGDBConnector(testing = True)

        self.conn = self.connector.connectDB()
        self.lastSeqVal = None

        # Does this work having the dictCur be in another class?
        self.dictCur = self.connector.dictCur

        self.cursor = self.conn.cursor()
        self.deleter = MECODBDeleter()
        self.tableName = 'MeterData'
        self.columnName = 'meter_data_id'
        self.configer = MSGConfiger()
        self.logger = MSGLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()

    def testMECODBUtilCanBeInited(self):
        self.assertIsNotNone(self.dbUtil)

    def testLastSequenceNumberIsCorrect(self):
        """
        Test if last sequence ID value is generated correctly. Do this by
        inserting and deleting a DB record.
        """

        # Insert some values.
        sampleDict = {'MeterName': '100001', 'UtilDeviceID': '100001',
                      'MacID': '00:00:00:00:00:00:00:00'}
        self.i.insertData(self.conn, self.tableName, sampleDict)

        self.lastSeqVal = self.dbUtil.getLastSequenceID(self.conn,
                                                        self.tableName,
                                                        self.columnName)
        print "lastSeqVal = %s" % self.lastSeqVal

        sql = """SELECT * FROM "%s" WHERE %s = %s""" % (
        self.tableName, self.columnName, self.lastSeqVal)
        dictCur = self.connector.dictCur
        self.dbUtil.executeSQL(dictCur, sql)
        row = dictCur.fetchone()
        meterDataID = row[self.columnName]
        self.assertEqual(self.lastSeqVal, meterDataID)

    def testGetDBName(self):
        dbName = self.dbUtil.getDBName(self.cursor)[0]
        self.logger.log("DB name is %s" % dbName, 'info')
        self.assertEqual(dbName, "test_meco",
                         "Testing DB name should be set correctly.")


    def testEraseTestingDatabase(self):
        """
        Test that calls to eraseTestMeco() work correctly.
        """

        dbName = self.dbUtil.getDBName(self.cursor)[0]
        self.logger.log("DB name is %s" % dbName, 'info')
        self.assertEqual(dbName, "test_meco",
                         "Testing DB name should be set correctly.")
        self.dbUtil.eraseTestMeco()

        # Check all of the tables for the presence of records.
        for table in self.configer.insertTables:
            sql = """select count(*) from "%s";""" % table
            self.dbUtil.executeSQL(self.dictCur, sql)
            row = self.dictCur.fetchone()
            self.assertEqual(row[0], 0,
                             "No records should be present in the %s table."
                             % table)

    def testColumns(self):
        """
        Test the ability to retrieve the column names from a database.
        """

        print self.dbUtil.columns(self.cursor, 'Event')


    def tearDown(self):
        """
        Delete the record that was inserted.
        """
        if self.lastSeqVal != None:
            self.deleter.deleteRecord(self.conn, self.tableName,
                                      self.columnName, self.lastSeqVal)

        self.connector.closeDB(self.conn)
class MSGNOAAWeatherDataInserter(object):
    """
    Performs weather data insertion to a database.
    """

    def __init__(self, testing = False):
        """
        Constructor.
        :param testing: True if testing mode is being used.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.dbUtil = MSGDBUtil()
        self.dupeChecker = MSGWeatherDataDupeChecker()

    def insertDataDict(self, conn, tableName, listOfDataDicts, commit = False):
        """
        Given a table name and a dictionary of column names and values,
        insert them to the db.

        :param conn: A database connection.
        :param tableName: Name of the DB table to be inserted to.
        :param columnsAndValues: Dictionary of columns and values to be
        inserted to the DB.
        :param (optional) commit: A flag indicated that DB transactions will
        be committed.
        :returns: Set of datetimes processed.
        """

        cur = conn.cursor()
        processedDateTimes = set()

        for row in listOfDataDicts:

            # Add a creation timestamp using the SQL function.
            row['created'] = 'NOW()'

            cols = []
            vals = []

            for col in row.keys():
                # Prepare the columns and values for insertion via SQL.

                cols.append(col)
                if (row[col] != 'NULL'):
                    # Surround each value with single quotes...
                    vals.append("'%s'" % row[col])
                else:
                    # Except for NULL values.
                    vals.append("%s" % row[col])

            sql = """INSERT INTO "%s" (%s) VALUES (%s)""" % (
                tableName, ','.join(cols), ','.join(vals))

            if self.dupeChecker.duplicateExists(cur, row['wban'],
                                                row['datetime'],
                                                row['record_type']):
                self.logger.log("Dupe found, dropping dupe.", 'info')
            else:
                processedDateTimes.add(
                    dt.datetime.strptime(row['datetime'], "%Y-%m-%d %H:%M"))
                if self.dbUtil.executeSQL(cur, sql,
                                          exitOnFail = False) is False:
                    # An error occurred.
                    for col in sorted(row.keys()):
                        print "%s: %s" % (col, row[col])
                    sys.exit(-1)

        if commit:
            try:
                conn.commit()
            except:
                self.logger.log("ERROR: Commit failed.", 'debug')

        return processedDateTimes
class MSGWeatherDataUtil(object):
    """
    Utility methods for working with weather data.
    """

    def __init__(self):
        """
        Constructor.

        A database connection is not maintained here to keep this class
        lightweight. This results in the class not having a parameter for
        TESTING MODE.
        """

        self.logger = MSGLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.url = self.configer.configOptionValue('Weather Data',
                                                   'weather_data_url')
        self.pattern = self.configer.configOptionValue('Weather Data',
                                                       'weather_data_pattern')
        self.fileList = []
        self.dateList = [] # List of dates corresponding weather data files.
        self.fillFileListAndDateList()
        self.dbUtil = MSGDBUtil()


    def fillFileListAndDateList(self):
        """
        Return a list of weather files obtained from the remote server used
        in processing weather data.
        """

        response = urllib2.urlopen(self.url).read()

        for filename in re.findall(self.pattern, response):
            self.fileList.append(filename[0])
            self.dateList.append(self.datePart(filename = filename[0]))


    def datePart(self, filename = None, datetime = None):
        """
        Return the date part of a NOAA weather data filename.

        :param: The filename.
        :param: A datetime object.
        :returns: The date part of the given parameter.
        """

        assert filename == None or datetime == None, "One argument is allowed."
        if filename:
            newName = filename.replace("QCLCD", '')
            newName = newName.replace(".zip", '')
            return newName
        if datetime:
            return datetime.strftime('%Y-%m-%d')

    def getLastDateLoaded(self, cursor):
        """
        Return the last date of loaded weather data.

        :returns: Last date.
        """

        sql = """select wban, datetime, record_type from "%s"
                 ORDER BY datetime desc limit 1""" % WEATHER_DATA_TABLE

        self.dbUtil.executeSQL(cursor, sql)
        row = cursor.fetchone()
        # self.logger.log('Date last loaded = %s' % row[1], 'info')
        return row[1]


    def getKeepList(self, fileList, cursor):
        """
        The Keep List is the list of filenames of files containing data that are
        *within* the month of the last loaded date or are beyond the last loaded
        date.

        :param: fileList: A list of files containing weather data.
        :param: DB cursor.
        :returns: List of weather data filenames to process.
        """

        keepList = []
        i = 0
        for date in fileList:
            self.logger.log('Examining date %s.' % date)

            # The list date should be the last day of the month.
            # It is the date that is compared against the last retrieved date.

            listDate = dt.datetime.strptime(self.datePart(filename = date),
                                            "%Y%m")
            lastDay = calendar.monthrange(listDate.year, listDate.month)[1]
            listDate = dt.datetime.strptime(
                '%s-%s-%s' % (listDate.year, listDate.month, lastDay),
                "%Y-%m-%d")
            self.logger.log('List date = %s.' % listDate)
            lastDate = self.getLastDateLoaded(cursor)

            self.logger.log('last date = %s' % lastDate)

            if lastDate <= listDate:
                keepList.append((i, listDate))

            i += 1

        if keepList:
            keepList.sort()

        return [fileList[d[0]] for d in keepList]
Пример #32
0
class MECODupeChecker(object):
    """
    Check for duplicate data in the database.
    """
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'debug')
        self.mecoConfig = MSGConfiger()
        self.currentReadingID = 0
        self.dbUtil = MSGDBUtil()

    def getLastElement(self, rows):
        """
        Get the last element in a collection.

        Example:
            rows = (element1, element2, element3)
            getLastElement(rows) # return element3

        :param rows Result froms from a query
        :return last element in the collection
        """

        for i, var in enumerate(rows):
            if i == len(rows) - 1:
                return var

    def eventBranchDupeExists(self, conn, meterName, eventTime):
        """

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param eventTime: Timestamp of event.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Event".event_time,
                        "MeterData".meter_data_id,
                        "EventData".event_data_id
                 FROM ( ( "MeterData" JOIN "EventData" ON (
                        ( "MeterData".meter_data_id = "EventData"
                        .meter_data_id ) ) )
                 JOIN "Event" ON ( ( "EventData".event_data_id = "Event"
                 .event_data_id ) ) )
                 WHERE "MeterData".meter_name = '%s'
                 AND "Event".event_time = '%s' """ % (meterName, eventTime)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False

    def registerBranchDupeExists(self,
                                 conn,
                                 meterName,
                                 readTime,
                                 registerNumber,
                                 DEBUG=False):
        """
        Determine if a register branch duplicate exists for a given meter
        name, read time, number tuple.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param readTime: Read time in RegisterRead table.
        :param registerNumber: Corresponds to DB column "number".
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        sql = """SELECT "public"."MeterData".meter_name,
                        "public"."RegisterRead".read_time,
                        "public"."Register"."number"
                 FROM "public"."MeterData"
                 INNER JOIN "public"."RegisterData" ON
                      "public" ."MeterData".meter_data_id = "public"
                      ."RegisterData".meter_data_id
                 INNER JOIN "public"."RegisterRead" ON
                      "public"."RegisterData" .register_data_id = "public"
                      ."RegisterRead".register_data_id
                 INNER JOIN "public"."Tier" ON "public"."RegisterRead"
                 .register_read_id = "public"."Tier" .register_read_id
                 INNER JOIN "public"."Register" ON "public"."Tier".tier_id =
                 "public"."Register".tier_id
                 WHERE "public"."MeterData".meter_name = '%s'
                 AND "public"."RegisterRead".read_time = '%s'
                 AND "public"."Register".number = '%s'
                 """ % (meterName, readTime, registerNumber)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            return True
        else:
            return False

    def readingBranchDupeExists(self,
                                conn,
                                meterName,
                                endTime,
                                channel=None,
                                DEBUG=False):
        """
        Duplicate cases:
        1. Tuple (meterID, endTime) exists in the database.
        @DEPRECATED in favor of (2), full meterName-endTime-channel query.

        2. Tuple (meterID, endTime, channel) exists in the database.

        :param conn: Database connection.
        :param meterName: Meter name in MeterData table.
        :param endTime: End time in Interval table.
        :param channel: Required parameter that was previously optional. An
        optional channel is now deprecated.
        :return: True if tuple exists, False if not.
        """

        dbCursor = conn.cursor()

        if DEBUG:
            print "readingBranchDupeExists():"

        if channel != None:
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id,
                            "Reading".channel,
                            "Reading".reading_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     INNER JOIN "Reading" ON "Interval".interval_id = "Reading"
                     .interval_id
                     WHERE "Interval".end_time = '%s' and meter_name = '%s' and
                     channel = '%s'""" % (endTime, meterName, channel)

        else:  # deprecated query
            sql = """SELECT	"Interval".end_time,
                            "MeterData".meter_name,
                            "MeterData".meter_data_id
                     FROM "MeterData"
                     INNER JOIN "IntervalReadData" ON "MeterData"
                     .meter_data_id =
                      "IntervalReadData".meter_data_id
                     INNER JOIN "Interval" ON "IntervalReadData"
                     .interval_read_data_id = "Interval".interval_read_data_id
                     WHERE "Interval".end_time = '%s' and meter_name =
                     '%s'""" % (endTime, meterName)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if len(rows) > 0:
            assert len(
                rows) < 2, "Dupes should be less than 2, found %s: %s." % (
                    len(rows), rows)

            self.currentReadingID = self.getLastElement(rows[0])
            self.logger.log('Reading ID = %s.' % self.currentReadingID,
                            'silent')

            self.logger.log(
                "Duplicate found for meter %s, end time %s, channel %s." %
                (meterName, endTime, channel), 'silent')
            return True

        else:
            self.logger.log(
                "Found no rows for meter %s, end time %s, channel %s." %
                (meterName, endTime, channel), 'silent')
            return False

    def readingValuesAreInTheDatabase(self, conn, readingDataDict):
        """
        Given a reading ID, verify that the values associated are present
        in the database.

        Values are from the columns:
            1. channel
            2. raw_value
            3. uom
            4. value

        :param dictionary containing reading values
        :return True if the existing values are the same, otherwise return False
        """

        dbCursor = conn.cursor()

        sql = """SELECT "Reading".reading_id,
                                "Reading".channel,
                                "Reading".raw_value,
                                "Reading".uom,
                                "Reading"."value"
                         FROM "Reading"
                         WHERE "Reading".reading_id = %s""" % (
            self.currentReadingID)

        self.dbUtil.executeSQL(dbCursor, sql)
        rows = dbCursor.fetchall()

        if self.currentReadingID == 0:
            return False

        # assert len(rows) == 1 or len(rows) == 0
        assert len(
            rows) == 1, "Didn't find a matching reading for reading ID %s." %\
                        self.currentReadingID
        if len(rows) == 1:
            self.logger.log("Found %s existing matches." % len(rows), 'silent')

            allEqual = True
            if int(readingDataDict['Channel']) == int(rows[0][1]):
                print "channel equal,"
            else:
                self.logger.log(
                    "channel not equal: %s,%s,%s" %
                    (int(readingDataDict['Channel']), int(
                        rows[0][1]), readingDataDict['Channel'] == rows[0][1]),
                    'debug')
                allEqual = False

            if int(readingDataDict['RawValue']) == int(rows[0][2]):
                print "raw value equal,"
            else:
                self.logger.log(
                    "rawvalue not equal: %s,%s,%s" %
                    (int(readingDataDict['RawValue']), int(rows[0][2]),
                     readingDataDict['RawValue'] == rows[0][2]), 'debug')
                allEqual = False

            if readingDataDict['UOM'] == rows[0][3]:
                print "uom equal,"
            else:
                self.logger.log(
                    "uom not equal: %s,%s,%s" %
                    (readingDataDict['UOM'], rows[0][3], readingDataDict['UOM']
                     == rows[0][3]), 'debug')
                allEqual = False

            if self.approximatelyEqual(float(readingDataDict['Value']),
                                       float(rows[0][4]), 0.001):
                self.logger.log("value equal", 'silent')
            else:
                self.logger.log(
                    "value not equal: %s,%s,%s" %
                    (float(readingDataDict['Value']), float(
                        rows[0][4]), readingDataDict['Value'] == rows[0][4]),
                    'debug')
                allEqual = False

            if allEqual:
                return True
            else:
                return False
        else:
            return False

    def approximatelyEqual(self, a, b, tolerance):
        return abs(a - b) < tolerance
Пример #33
0
class MSGDBUtilTester(unittest.TestCase):
    """
    Unit tests for MECO DB Utils.
    """
    def setUp(self):
        self.i = MECODBInserter()

        # Connect to the testing database.
        self.connector = MSGDBConnector(testing=True)

        self.conn = self.connector.connectDB()
        self.lastSeqVal = None

        # Does this work having the dictCur be in another class?
        self.dictCur = self.connector.dictCur

        self.cursor = self.conn.cursor()
        self.deleter = MECODBDeleter()
        self.tableName = 'MeterData'
        self.columnName = 'meter_data_id'
        self.configer = MSGConfiger()
        self.logger = SEKLogger(__name__, 'debug')
        self.dbUtil = MSGDBUtil()

    def testMECODBUtilCanBeInited(self):
        self.assertIsNotNone(self.dbUtil)

    def testLastSequenceNumberIsCorrect(self):
        """
        Test if last sequence ID value is generated correctly. Do this by
        inserting and deleting a DB record.
        """

        # Insert some values.
        sampleDict = {
            'MeterName': '100001',
            'UtilDeviceID': '100001',
            'MacID': '00:00:00:00:00:00:00:00'
        }
        self.i.insertData(self.conn, self.tableName, sampleDict)

        self.lastSeqVal = self.dbUtil.getLastSequenceID(
            self.conn, self.tableName, self.columnName)
        print "lastSeqVal = %s" % self.lastSeqVal

        sql = """SELECT * FROM "%s" WHERE %s = %s""" % (
            self.tableName, self.columnName, self.lastSeqVal)
        dictCur = self.connector.dictCur
        self.dbUtil.executeSQL(dictCur, sql)
        row = dictCur.fetchone()
        meterDataID = row[self.columnName]
        self.assertEqual(self.lastSeqVal, meterDataID)

    def testGetDBName(self):
        dbName = self.dbUtil.getDBName(self.cursor)[0]
        self.logger.log("DB name is %s" % dbName, 'info')
        self.assertEqual(dbName, "test_meco",
                         "Testing DB name should be set correctly.")

    def testEraseTestingDatabase(self):
        """
        Test that calls to eraseTestMeco() work correctly.
        """

        dbName = self.dbUtil.getDBName(self.cursor)[0]
        self.logger.log("DB name is %s" % dbName, 'info')
        self.assertEqual(dbName, "test_meco",
                         "Testing DB name should be set correctly.")
        self.dbUtil.eraseTestMeco()

        # Check all of the tables for the presence of records.
        for table in self.configer.insertTables:
            sql = """select count(*) from "%s";""" % table
            self.dbUtil.executeSQL(self.dictCur, sql)
            row = self.dictCur.fetchone()
            self.assertEqual(
                row[0], 0,
                "No records should be present in the %s table." % table)

    def testColumns(self):
        """
        Test the ability to retrieve the column names from a database.
        """

        print self.dbUtil.columns(self.cursor, 'Event')

    def tearDown(self):
        """
        Delete the record that was inserted.
        """
        if self.lastSeqVal != None:
            self.deleter.deleteRecord(self.conn, self.tableName,
                                      self.columnName, self.lastSeqVal)

        self.connector.closeDB(self.conn)