def __init__(self,
                 exitOnError=True,
                 commitOnEveryInsert=False,
                 testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = 'Aggregation'
        tableList = [
            'irradiance', 'agg_irradiance', 'weather', 'agg_weather',
            'circuit', 'agg_circuit', 'egauge', 'agg_egauge'
        ]
        self.dataParams = {
            'weather': ('agg_weather', 'timestamp', ''),
            'egauge': ('agg_egauge', 'datetime', 'egauge_id'),
            'circuit': ('agg_circuit', 'timestamp', 'circuit'),
            'irradiance': ('agg_irradiance', 'timestamp', 'sensor_id')
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {
            t: self.configer.configOptionValue(section, '{}_table'.format(t))
            for t in tableList
        }

        for t in self.tables.keys():
            self.logger.log('t:{}'.format(t), 'DEBUG')
            try:
                self.columns[t] = self.dbUtil.columnsString(
                    self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log(
                    'Ignoring missing table: Error is {}.'.format(error),
                    'error')
示例#2
0
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG', useColor=False)
        self.timeUtil = MSGTimeUtil()
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()
        self.pythonUtil = MSGPythonUtil()  # for debugging
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = SEKNotifier(
            connector=self.connector,
            dbUtil=self.dbUtil,
            user=self.configer.configOptionValue('Notifications',
                                                 'email_username'),
            password=self.configer.configOptionValue('Notifications',
                                                     'email_password'),
            fromaddr=self.configer.configOptionValue('Notifications',
                                                     'email_from_address'),
            toaddr=self.configer.configOptionValue('Notifications',
                                                   'email_recipients'),
            testing_toaddr=self.configer.configOptionValue(
                'Notifications', 'testing_email_recipients'),
            smtp_server_and_port=self.configer.configOptionValue(
                'Notifications', 'smtp_server_and_port'))

        # Google Drive parameters.
        self.clientID = self.configer.configOptionValue(
            'Export', 'google_api_client_id')
        self.clientSecret = self.configer.configOptionValue(
            'Export', 'google_api_client_secret')
        self.oauthScope = 'https://www.googleapis.com/auth/drive'
        self.oauthConsent = 'urn:ietf:wg:oauth:2.0:oob'
        self.googleAPICredentials = ''
        self.exportTempWorkPath = self.configer.configOptionValue(
            'Export', 'db_export_work_path')

        self.credentialPath = self.configer.configOptionValue(
            'Export', 'google_api_credentials_path')
        self.credentialStorage = Storage('{}/google_api_credentials'.format(
            self.credentialPath))

        self._driveService = None
        self._cloudFiles = None
        self.postAgent = 'Maui Smart Grid 1.0.0 DB Exporter'
        self.retryDelay = 10
        self.availableFilesURL = ''
示例#3
0
    def setUp(self):
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.configer = MSGConfiger()
        self.exporter = MSGDBExporter()
        self.testDir = 'db_exporter_test'
        self.uncompressedTestFilename = 'meco_v3_test_data.sql'
        self.compressedTestFilename = 'meco_v3_test_data.sql.gz'
        self.exportTestDataPath = self.configer.configOptionValue(
            'Testing', 'export_test_data_path')
        self.fileUtil = MSGFileUtil()
        self.fileChunks = []
        self.testDataFileID = ''
        self.pyUtil = MSGPythonUtil()
        self.timeUtil = MSGTimeUtil()

        conn = None
        try:
            conn = MSGDBConnector().connectDB()
        except Exception as detail:
            self.logger.log("Exception occurred: {}".format(detail), 'error')
            exit(-1)

        self.logger.log("conn = {}".format(conn), 'debug')
        self.assertIsNotNone(conn)

        # Create a temporary working directory.
        try:
            os.mkdir(self.testDir)
        except OSError as detail:
            self.logger.log(
                'Exception during creation of temp directory: %s' % detail,
                'ERROR')
示例#4
0
class MSGTimeUtilTester(unittest.TestCase):
    def setUp(self):
        self.logger = SEKLogger(__name__, 'debug')
        self.timeUtil = MSGTimeUtil()

    def test_concise_now(self):
        conciseNow = self.timeUtil.conciseNow()
        self.logger.log(conciseNow)
        pattern = '\d+-\d+-\d+_\d+'
        result = re.match(pattern, conciseNow)
        self.assertTrue(result is not None,
                        "Concise now matches the regex pattern.")

    def test_split_dates(self):
        start = dt(2014, 01, 07)
        end = dt(2014, 04, 04)
        print self.timeUtil.splitDates(start, end)
        self.assertEqual(len(self.timeUtil.splitDates(start, end)), 4,
                         'Unexpected date count.')
    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG', useColor = False)
        self.timeUtil = MSGTimeUtil()
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()
        self.pythonUtil = MSGPythonUtil()  # for debugging
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = SEKNotifier(connector = self.connector,
                                    dbUtil = self.dbUtil,
                                    user = self.configer.configOptionValue(
                                        'Notifications', 'email_username'),
                                    password = self.configer.configOptionValue(
                                        'Notifications', 'email_password'),
                                    fromaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_from_address'),
                                    toaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_recipients'),
                                    testing_toaddr =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'testing_email_recipients'),
                                    smtp_server_and_port =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'smtp_server_and_port'))

        # Google Drive parameters.
        self.clientID = self.configer.configOptionValue('Export',
                                                        'google_api_client_id')
        self.clientSecret = self.configer.configOptionValue('Export',
                                                            'google_api_client_secret')
        self.oauthScope = 'https://www.googleapis.com/auth/drive'
        self.oauthConsent = 'urn:ietf:wg:oauth:2.0:oob'
        self.googleAPICredentials = ''
        self.exportTempWorkPath = self.configer.configOptionValue('Export',
                                                                  'db_export_work_path')

        self.credentialPath = self.configer.configOptionValue('Export',
                                                              'google_api_credentials_path')
        self.credentialStorage = Storage(
            '{}/google_api_credentials'.format(self.credentialPath))

        self._driveService = None
        self._cloudFiles = None
        self.postAgent = 'Maui Smart Grid 1.0.0 DB Exporter'
        self.retryDelay = 10
        self.availableFilesURL = ''
    def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, "info")
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = "Aggregation"
        tableList = [
            "irradiance",
            "agg_irradiance",
            "weather",
            "agg_weather",
            "circuit",
            "agg_circuit",
            "egauge",
            "agg_egauge",
        ]
        self.dataParams = {
            "weather": ("agg_weather", "timestamp", ""),
            "egauge": ("agg_egauge", "datetime", "egauge_id"),
            "circuit": ("agg_circuit", "timestamp", "circuit"),
            "irradiance": ("agg_irradiance", "timestamp", "sensor_id"),
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {t: self.configer.configOptionValue(section, "{}_table".format(t)) for t in tableList}

        for t in self.tables.keys():
            self.logger.log("t:{}".format(t), "DEBUG")
            try:
                self.columns[t] = self.dbUtil.columnsString(self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log("Ignoring missing table: Error is {}.".format(error), "error")
    def __init__(self, exitOnError = True, commitOnEveryInsert = False,
                 testing = False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = MSGLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = 'Aggregation'
        tableList = ['irradiance', 'agg_irradiance', 'weather', 'agg_weather',
                     'circuit', 'agg_circuit', 'egauge', 'agg_egauge']
        self.dataParams = {'weather': ('agg_weather', 'timestamp', ''),
                           'egauge': ('agg_egauge', 'datetime', 'egauge_id'),
                           'circuit': ('agg_circuit', 'timestamp', 'circuit'),
                           'irradiance': (
                               'agg_irradiance', 'timestamp', 'sensor_id')}
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {
            t: self.configer.configOptionValue(section, '%s_table' % t) for t in
            tableList}

        for t in self.tables.keys():
            self.logger.log('t:%s' % t, 'DEBUG')
            try:
                self.columns[t] = self.dbUtil.columnsString(self.cursor,
                                                            self.tables[t])
            except TypeError as error:
                self.logger.log('Ignoring missing table: Error is %s.' % error,
                                'error')
    Save retrieval results stored in a global string.
    """

    global MSG_BODY
    global WEATHER_DATA_PATH
    fp = open("%s/retrieval-results.txt" % WEATHER_DATA_PATH, "wb")
    fp.write(MSG_BODY)
    fp.close()


if __name__ == "__main__":

    dbConnector = MSGDBConnector()
    cursor = dbConnector.conn.cursor()
    weatherUtil = MSGWeatherDataUtil()
    timeUtil = MSGTimeUtil()

    msg = "Downloading NOAA weather data (%s)." % timeUtil.conciseNow()
    print msg
    MSG_BODY = "%s\n" % msg

    msg = "Last loaded date is %s." % weatherUtil.datePart(datetime=weatherUtil.getLastDateLoaded(cursor))
    print msg
    MSG_BODY += "%s\n" % msg

    retriever = MSGWeatherDataRetriever()
    configer = MSGConfiger()
    WEATHER_DATA_PATH = configer.configOptionValue("Weather Data", "weather_data_path")

    msg = "Using URL %s." % weatherDataURL
    print msg
示例#9
0
from apiclient import errors
from msg_configer import MSGConfiger
import os
import shutil
import gzip
from msg_file_util import MSGFileUtil
from msg_db_connector import MSGDBConnector
from msg_db_util import MSGDBUtil
import re
from msg_python_util import MSGPythonUtil
import itertools
import time
from msg_time_util import MSGTimeUtil
from msg_types import MSGNotificationHistoryTypes

EARLIEST_DATE = MSGTimeUtil().datetimeForString('2011-01-01 00:00')


class MSGDBExporterTester(unittest.TestCase):
    """
    Unit tests for the MSG Cloud Exporter.
    """
    def setUp(self):
        self.logger = SEKLogger(__name__, 'DEBUG')
        self.configer = MSGConfiger()
        self.exporter = MSGDBExporter()
        self.testDir = 'db_exporter_test'
        self.uncompressedTestFilename = 'meco_v3_test_data.sql'
        self.compressedTestFilename = 'meco_v3_test_data.sql.gz'
        self.exportTestDataPath = self.configer.configOptionValue(
            'Testing', 'export_test_data_path')
示例#10
0
 def setUp(self):
     self.logger = SEKLogger(__name__, 'debug')
     self.timeUtil = MSGTimeUtil()
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """
    def __init__(self,
                 exitOnError=True,
                 commitOnEveryInsert=False,
                 testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, 'info')
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = 'Aggregation'
        tableList = [
            'irradiance', 'agg_irradiance', 'weather', 'agg_weather',
            'circuit', 'agg_circuit', 'egauge', 'agg_egauge'
        ]
        self.dataParams = {
            'weather': ('agg_weather', 'timestamp', ''),
            'egauge': ('agg_egauge', 'datetime', 'egauge_id'),
            'circuit': ('agg_circuit', 'timestamp', 'circuit'),
            'irradiance': ('agg_irradiance', 'timestamp', 'sensor_id')
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {
            t: self.configer.configOptionValue(section, '{}_table'.format(t))
            for t in tableList
        }

        for t in self.tables.keys():
            self.logger.log('t:{}'.format(t), 'DEBUG')
            try:
                self.columns[t] = self.dbUtil.columnsString(
                    self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log(
                    'Ignoring missing table: Error is {}.'.format(error),
                    'error')

    def existingIntervals(self, aggDataType='', timeColumnName=''):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0] for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName))
        ]

    def unaggregatedIntervalCount(self,
                                  dataType='',
                                  aggDataType='',
                                  timeColumnName='',
                                  idColumnName=''):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(
            self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName,
                                       idColumnName))

    def lastAggregationEndpoint(self, aggDataType='', timeColumnName=''):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType,
                                      timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self,
                              dataType='',
                              aggDataType='',
                              timeColumnName='',
                              idColumnName=''):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != '':
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = 'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON ' \
                  '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{' \
                  '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, ' \
                  '{3} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y[0].timetuple()[0:5],
                        filter(
                            lambda x: x[0].timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(
                                x[0], x[1]) for x in self.rows(
                                    sql.format(
                                        self.tables[dataType],
                                        self.tables[aggDataType],
                                        timeColumnName, idColumnName,
                                        self.lastAggregationEndpoint(
                                            aggDataType, timeColumnName)))])))
            ])
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = 'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}=' \
                  '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' ' \
                  'ORDER BY {2} ASC'

            self.logger.log('last agg endpoint: {}'.format(
                self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0), [
                k for k, v in groupby(
                    map(
                        lambda y: y.timetuple()[0:5],
                        filter(
                            lambda x: x.timetuple()[MINUTE_POSITION] %
                            INTERVAL_DURATION == 0, [(x[0]) for x in self.rows(
                                sql.format(
                                    self.tables[dataType],
                                    self.tables[aggDataType], timeColumnName,
                                    self.lastAggregationEndpoint(
                                        aggDataType, timeColumnName)))])))
            ])

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception('Minute not defined.')

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if minute >= self.nextMinuteCrossing[subkey] and minute <= last \
                    and \
                            self.nextMinuteCrossing[subkey] != first:
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log('minute crossed at #1.', 'debug')
                return True
            elif self.nextMinuteCrossing[
                    subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log('minute crossed at #2.', 'debug')
                return True
            return False
        else:
            if minute >= self.nextMinuteCrossingWithoutSubkeys and minute <= \
                    last and self.nextMinuteCrossingWithoutSubkeys != first:
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log('minute crossed at #3.', 'debug')
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >=\
                    first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log('minute crossed at #4.', 'debug')
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log('sql: {}'.format(sql), 'debug')
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self,
                dataType='',
                orderBy=None,
                timestampCol='',
                startDate='',
                endDate=''):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows("""SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(self.columns[dataType],
                                   self.tables[dataType], timestampCol,
                                   startDate, endDate, ','.join(orderBy)))

    def subkeys(self,
                dataType='',
                timestampCol='',
                subkeyCol='',
                startDate='',
                endDate=''):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0] for sk in self.rows("""SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(subkeyCol, self.tables[dataType],
                                  timestampCol, startDate, endDate, subkeyCol))
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception('agg columns not defined.')
        if not agg.data:
            raise Exception('agg data not defined.')

        self.logger.log('agg data: {}'.format(agg.data))
        self.logger.log('agg data type: {}'.format(type(agg.data)))

        def __insertData(values=''):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ','.join(agg.columns),
                values)
            self.logger.log('sql: {}'.format(sql), 'debug')
            success = self.dbUtil.executeSQL(self.cursor,
                                             sql,
                                             exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception('Failure during aggregated data insert.')

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ''
                    valCnt = 0
                    for val in row[key]:
                        if val == 'NULL':
                            values += val
                        elif type(val) == type(''):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ''
                valCnt = 0
                for val in row:
                    if val == 'NULL':
                        values += val
                    elif type(val) == type(''):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log('row = {}'.format(row), 'error')
                raise Exception('Row type not matched.')

        # End for row.
        self.conn.commit()

    def intervalAverages(self,
                         sums,
                         cnts,
                         timestamp,
                         timestampIndex,
                         subkeyIndex=None,
                         subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log('key: {}'.format(subkey), 'debug')
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[subkey][sumIndex]), 'debug')
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append('NULL')
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log(
                                'Aggregating {} rows of data.'.format(
                                    cnts[sumIndex]), 'debug')
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append('NULL')
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=''):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log('Unmatched data type {}.'.format(dataType))

    def aggregateAllData(self, dataType=''):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName,
                                                  dataType=dataType):
            self.logger.log('start, end: {}, {}'.format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime('%Y-%m-%d %H:%M:%S'),
                endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log('aggData row: {}'.format(row))

    def aggregateNewData(self, dataType=''):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = \
            self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            'datatype: {}; start, end: {}, {}; end type: {}'.format(
                dataType, start, end, type(end)), 'critical')

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log('Nothing to aggregate.')
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime(
                '%Y-%m-%d %H:%M:%S'),
            endDate=end.strftime('%Y-%m-%d %H:%M:%S'))
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log('aggData row: {}'.format(row))

        self.logger.log('{} rows aggregated for {}.'.format(
            len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=''):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log('datatype {}'.format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log('subkey colname {}'.format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType,
            aggDataType=aggType,
            timeColumnName=timeColName,
            idColumnName=subkeyColName)

        self.logger.log('unagg endpoints: {}'.format(unAggregatedEndpoints))
        return {
            dataType:
            (unAggregatedEndpoints[-1]
             if unAggregatedEndpoints != [] else None,
             self.lastAggregationEndpoint(aggDataType=aggType,
                                          timeColumnName=timeColName))
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints,
                         [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName='', dataType=''):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log('datatype {}'.format(dataType), 'debug')
        (start,
         end) = self.rows("""SELECT MIN({}), MAX({}) FROM \"{}\"""".format(
             timeColumnName, timeColumnName, self.tables[dataType]))[0]
        self.logger.log('start {}'.format(start))
        self.logger.log('end {}'.format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (splitDates[i][0],
                 self.incrementEndpoint(
                     datetime(splitDates[i][1].timetuple()[0],
                              splitDates[i][1].timetuple()[1],
                              splitDates[i][1].timetuple()[2], 23, 59, 59))))
            i += 1

        return startEndDatesTransform

    def aggregatedData(self,
                       dataType='',
                       aggregationType='',
                       timeColumnName='',
                       subkeyColumnName='',
                       startDate='',
                       endDate=''):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(',').index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(dataType=dataType,
                                     timestampCol=timeColumnName,
                                     subkeyCol=subkeyColumnName,
                                     startDate=startDate,
                                     endDate=endDate)

        self.logger.log('subkeys: {}'.format(mySubkeys), 'debug')

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(','))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(','))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(','))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log('subkeys to check: {}'.format(subkeysToCheck),
                            'debug')

            if mySubkeys:
                for row in self.rawData(
                        dataType=dataType,
                        orderBy=[timeColumnName, subkeyColumnName],
                        timestampCol=timeColumnName,
                        startDate=startDate,
                        endDate=endDate):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(
                            timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(
                                subkeyColumnName)]] = 0
                        else:
                            raise Exception(
                                'Unable to determine next minute crossing')
                        self.logger.log(
                            'next min crossing for {} = {}'.format(
                                row[ci(subkeyColumnName)],
                                self.nextMinuteCrossing[row[ci(
                                    subkeyColumnName)]]), 'debug')
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(dataType=dataType,
                                        orderBy=[timeColumnName],
                                        timestampCol=timeColumnName,
                                        startDate=startDate,
                                        endDate=endDate):
                    minute = row[ci(
                        timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception(
                            'Unable to determine next minute crossing')
                    self.logger.log(
                        'next min crossing = {}'.format(
                            self.nextMinuteCrossingWithoutSubkeys), 'debug')
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(dataType=dataType,
                                orderBy=[timeColumnName, subkeyColumnName],
                                timestampCol=timeColumnName,
                                startDate=startDate,
                                endDate=endDate):

            if mySubkeys:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(
                            row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute,
                                        subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log(
                        'key: {}'.format(row[ci(subkeyColumnName)]), 'debug')
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName),
                                              ci(subkeyColumnName),
                                              row[ci(subkeyColumnName)])
                    ]
                    self.logger.log('minute crossed {}'.format(minuteCrossed),
                                    'DEBUG')

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum,
                     cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)],
                                              sums=sum,
                                              cnts=cnt)
            else:
                for col in self.columns[dataType].split(','):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [
                        self.intervalAverages(sum, cnt,
                                              row[ci(timeColumnName)],
                                              ci(timeColumnName))
                    ]
                    (sum, cnt) = __initSumAndCount(subkey=None,
                                                   sums=sum,
                                                   cnts=cnt)

            rowCnt += 1

        self.logger.log('aggdata = {}'.format(aggData), 'debug')
        return MSGAggregatedData(aggregationType=aggregationType,
                                 columns=self.columns[dataType].split(','),
                                 data=aggData)
from msg_noaa_weather_data_inserter import MSGNOAAWeatherDataInserter
from msg_db_connector import MSGDBConnector
from msg_time_util import MSGTimeUtil
from msg_noaa_weather_data_util import MSGWeatherDataUtil


configer = MSGConfiger()
logger = SEKLogger(__name__, 'info')
binPath = MSGConfiger.configOptionValue(configer, "Executable Paths",
                                        "msg_bin_path")
COMMAND_LINE_ARGS = None
msgBody = ''
notifier = MSGNotifier()
dataParser = MSGNOAAWeatherDataParser()
inserter = MSGNOAAWeatherDataInserter()
timeUtil = MSGTimeUtil()


def processCommandLineArguments():
    global COMMAND_LINE_ARGS
    argParser = argparse.ArgumentParser(
        description = 'Perform recursive insertion of compressed weather data'
                      ' contained in the current directory to the MECO '
                      'database specified in the configuration file.')
    argParser.add_argument('--email', action = 'store_true', default = False,
                           help = 'Send email notification if this flag is '
                                  'specified.')
    argParser.add_argument('--testing', action = 'store_true', default = False,
                           help = 'If this flag is on, '
                                  'insert data to the testing database as '
                                  'specified in the local configuration file.')
from msg_noaa_weather_data_parser import MSGNOAAWeatherDataParser
from msg_noaa_weather_data_inserter import MSGNOAAWeatherDataInserter
from msg_db_connector import MSGDBConnector
from msg_time_util import MSGTimeUtil
from msg_noaa_weather_data_util import MSGWeatherDataUtil

configer = MSGConfiger()
logger = SEKLogger(__name__, 'info')
binPath = MSGConfiger.configOptionValue(configer, "Executable Paths",
                                        "msg_bin_path")
COMMAND_LINE_ARGS = None
msgBody = ''
notifier = MSGNotifier()
dataParser = MSGNOAAWeatherDataParser()
inserter = MSGNOAAWeatherDataInserter()
timeUtil = MSGTimeUtil()


def processCommandLineArguments():
    global COMMAND_LINE_ARGS
    argParser = argparse.ArgumentParser(
        description='Perform recursive insertion of compressed weather data'
        ' contained in the current directory to the MECO '
        'database specified in the configuration file.')
    argParser.add_argument('--email',
                           action='store_true',
                           default=False,
                           help='Send email notification if this flag is '
                           'specified.')
    argParser.add_argument('--testing',
                           action='store_true',
示例#14
0
class MSGDBExporter(object):
    """
    Export MSG DBs as SQL scripts.

    Supports export to local storage and to cloud storage.

    Usage:

    from msg_db_exporter import MSGDBExporter
    exporter = MSGDBExporter()

    Public API:

    exportDB(databases:List, 
             toCloud:Boolean, 
             testing:Boolean,
             numChunks:Integer, 
             deleteOutdated:Boolean): Export a list of DBs to the cloud.
    """

    # List of cloud files.
    @property
    def cloudFiles(self):
        self._cloudFiles = self.driveService.files().list().execute()
        return self._cloudFiles

    @property
    def driveService(self):
        if self._driveService:
            return self._driveService

        if not self.credentialPath:
            raise Exception("Credential path is required.")
        storage = Storage('{}/google_api_credentials'.format(
            self.credentialPath))

        self.googleAPICredentials = storage.get()

        self.logger.log("Authorizing credentials.", 'info')
        http = httplib2.Http()
        http = self.googleAPICredentials.authorize(http)

        self.logger.log("Authorized.", 'info')

        self._driveService = build('drive', 'v2', http=http)

        return self._driveService

    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG', useColor=False)
        self.timeUtil = MSGTimeUtil()
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()
        self.pythonUtil = MSGPythonUtil()  # for debugging
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = SEKNotifier(
            connector=self.connector,
            dbUtil=self.dbUtil,
            user=self.configer.configOptionValue('Notifications',
                                                 'email_username'),
            password=self.configer.configOptionValue('Notifications',
                                                     'email_password'),
            fromaddr=self.configer.configOptionValue('Notifications',
                                                     'email_from_address'),
            toaddr=self.configer.configOptionValue('Notifications',
                                                   'email_recipients'),
            testing_toaddr=self.configer.configOptionValue(
                'Notifications', 'testing_email_recipients'),
            smtp_server_and_port=self.configer.configOptionValue(
                'Notifications', 'smtp_server_and_port'))

        # Google Drive parameters.
        self.clientID = self.configer.configOptionValue(
            'Export', 'google_api_client_id')
        self.clientSecret = self.configer.configOptionValue(
            'Export', 'google_api_client_secret')
        self.oauthScope = 'https://www.googleapis.com/auth/drive'
        self.oauthConsent = 'urn:ietf:wg:oauth:2.0:oob'
        self.googleAPICredentials = ''
        self.exportTempWorkPath = self.configer.configOptionValue(
            'Export', 'db_export_work_path')

        self.credentialPath = self.configer.configOptionValue(
            'Export', 'google_api_credentials_path')
        self.credentialStorage = Storage('{}/google_api_credentials'.format(
            self.credentialPath))

        self._driveService = None
        self._cloudFiles = None
        self.postAgent = 'Maui Smart Grid 1.0.0 DB Exporter'
        self.retryDelay = 10
        self.availableFilesURL = ''

    def verifyExportChecksum(self, testing=False):
        """
        Verify the compressed export file using a checksum.

        * Save the checksum of the original uncompressed export data.
        * Extract the compressed file.
        * Verify the uncompressed export data.

        :param testing: When set to True, Testing Mode is used.
        """

        # Get the checksum of the original file.
        md5sum = self.fileUtil.md5Checksum(self.exportTempWorkPath)
        self.logger.log('md5sum: {}'.format(md5sum))

    def db_username(self):
        return "postgres"
        # return self.configer.configOptionValue('Database', 'db_username')

    def db_password(self):
        return self.configer.configOptionValue('Database', 'db_password')

    def db_port(self):
        return self.configer.configOptionValue('Database', 'db_port')

    def dumpCommand(self, db='', dumpName=''):
        """
        This method makes use of

        pg_dump -s -p ${PORT}
                   -U ${USERNAME}
                   [-T ${OPTIONAL_TABLE_EXCLUSIONS}]
                   ${DB_NAME} >
                   ${EXPORT_TEMP_WORK_PATH}/${DUMP_TIMESTAMP}_{DB_NAME}.sql

        :param db: String
        :param dumpName: String
        :return: String of command used to export DB.
        """

        # For reference only:
        # Password is passed from ~/.pgpass.
        # Note that ':' and '\' characters should be escaped with '\'.
        # Ref: http://www.postgresql.org/docs/9.1/static/libpq-pgpass.html

        # Dump databases as the superuser. This method does not require a
        # stored password when running under a root crontab.
        if not db or not dumpName:
            raise Exception('DB and dumpname required.')

        # Process exclusions.

        exclusions = self.dumpExclusionsDictionary()
        excludeList = []
        if db in exclusions:
            excludeList = exclusions[db]
        excludeString = ''
        if len(excludeList) > 0 and exclusions != None:
            for e in excludeList:
                excludeString += """-T '"{}"' """.format(e)

        return 'sudo -u postgres pg_dump -p {0} -U {1} {5} {2} > {3}/{4}' \
               '.sql'.format(self.db_port(), self.db_username(), db,
                             self.exportTempWorkPath, dumpName, excludeString)

    def dumpExclusionsDictionary(self):
        """
        :param db: String of DB name for which to retrieve exclusions.
        :return: Dictionary with keys as DBs and values as lists of tables to
        be excluded for a given database.
        """
        try:
            if type(
                    eval(
                        self.configer.configOptionValue(
                            'Export', 'db_export_exclusions'))) == type({}):
                return eval(
                    self.configer.configOptionValue('Export',
                                                    'db_export_exclusions'))
            else:
                return None
        except SyntaxError as detail:
            self.logger.log(
                'SyntaxError exception while getting exclusions: {}'.format(
                    detail))

    def dumpName(self, db=''):
        """
        :param db: String
        :return: String of file name used for dump file of db.
        """
        if not db:
            raise Exception('DB required.')
        return "{}_{}".format(self.timeUtil.conciseNow(), db)

    def filesToUpload(self, compressedFullPath='', numChunks=0, chunkSize=0):
        """
        :param compressedFullPath: String
        :param numChunks: Int
        :param chunkSize: Int
        :return: List of files to be uploaded according to their split
        sections, if applicable.
        """
        if numChunks != 0:
            self.logger.log('Splitting {}'.format(compressedFullPath), 'DEBUG')

            filesToUpload = self.fileUtil.splitLargeFile(
                fullPath=compressedFullPath,
                chunkSize=chunkSize,
                numChunks=numChunks)

            if not filesToUpload:
                raise Exception('Exception during file splitting.')
            else:
                self.logger.log('to upload: {}'.format(filesToUpload), 'debug')
                return filesToUpload

        else:
            return [compressedFullPath]

    def dumpResult(self, db='', dumpName='', fullPath=''):
        """
        :param dumpName: String of filename of dump file.
        :param fullPath: String of full path to dump file.
        :return: Boolean True if dump operation was successful, otherwise False.
        """

        success = True

        self.logger.log('fullPath: {}'.format(fullPath), 'DEBUG')

        try:
            # Generate the SQL script export.
            # @todo check return value of dump command
            self.logger.log('cmd: {}'.format(
                self.dumpCommand(db=db, dumpName=dumpName)))
            subprocess.check_call(self.dumpCommand(db=db, dumpName=dumpName),
                                  shell=True)
        except subprocess.CalledProcessError as error:
            self.logger.log("Exception while dumping: {}".format(error))
            sys.exit(-1)

        return success

    def exportDBs(self,
                  databases=None,
                  toCloud=False,
                  localExport=True,
                  testing=False,
                  chunkSize=0,
                  deleteOutdated=False):
        """
        Export a set of DBs to local storage.

        :param databases: List of database names that will be exported.
        :param toCloud: Boolean if set to True, then the export will also be
        copied to cloud storage.
        :param localExport: Boolean when set to True the DB is exported
        locally.
        :param testing: Boolean flag for testing mode. (@DEPRECATED)
        :param chunkSize: Integer size in bytes of chunk size used for
        splitting.
        :param deleteOutdated: Boolean indicating outdated files in the cloud
        should be removed.
        :returns: List of file IDs of uploaded files or None if there is an
        error condition.
        """

        # @todo separate uploading and exporting functions

        noErrors = True
        uploaded = []

        for db in databases:
            self.logger.log('Exporting {} using pg_dump.'.format(db), 'info')

            dumpName = self.dumpName(db=db)
            fullPath = '{}/{}.sql'.format(self.exportTempWorkPath, dumpName)
            if localExport:
                noErrors = self.dumpResult(db, dumpName, fullPath)

            # Perform compression of the file.
            self.logger.log("Compressing {} using gzip.".format(db), 'info')
            self.logger.log('fullpath: {}'.format(fullPath), 'DEBUG')

            gzipResult = self.fileUtil.gzipCompressFile(fullPath)
            compressedFullPath = '{}{}'.format(fullPath, '.gz')
            numChunks = self.numberOfChunksToUse(compressedFullPath)

            # Gzip uncompress and verify by checksum is disabled until a more
            # efficient, non-memory-based, uncompress is implemented.
            # md5sum1 = self.fileUtil.md5Checksum(fullPath)
            # self.md5Verification(compressedFullPath=compressedFullPath,
            # fullPath=fullPath,md5sum1=md5sum1)

            if toCloud:
                # Split compressed files into a set of chunks to improve the
                # reliability of uploads.

                # Upload the files to the cloud.
                for f in self.filesToUpload(
                        compressedFullPath=compressedFullPath,
                        numChunks=numChunks,
                        chunkSize=chunkSize):
                    self.logger.log('Uploading {}.'.format(f), 'info')
                    fileID = self.uploadFileToCloudStorage(
                        fullPath=f,
                        testing=testing,
                        retryCount=int(
                            self.configer.configOptionValue(
                                'Export', 'export_retry_count')))

                    self.logger.log('file id after upload: {}'.format(fileID))

                    if fileID != None:
                        uploaded.append(fileID)
                        self.logger.log('uploaded: {}'.format(uploaded),
                                        'DEBUG')
                        if not self.addReaders(
                                fileID,
                                self.configer.configOptionValue(
                                    'Export',
                                    'reader_permission_email_addresses').split(
                                        ','),
                                retryCount=int(
                                    self.configer.configOptionValue(
                                        'Export', 'export_retry_count'))):
                            self.logger.log(
                                'Failed to add readers for {}.'.format(f),
                                'error')
                        self.logSuccessfulExport(
                            *self.metadataOfFileID(fileID))

                    # Remove split sections if they exist.
                    try:
                        if not testing and numChunks > 1:
                            self.logger.log('Removing {}'.format(f))
                            os.remove('{}'.format(f))
                    except OSError as error:
                        self.logger.log(
                            'Exception while removing {}: {}.'.format(
                                fullPath, error))
                        noErrors = False

            # End if toCloud.

            if gzipResult:
                self.moveToFinalPath(compressedFullPath=compressedFullPath)

            # Remove the uncompressed file.
            try:
                if not testing:
                    self.logger.log('Removing {}'.format(fullPath))
                    os.remove('{}'.format(fullPath))
            except OSError as error:
                self.logger.log('Exception while removing {}: {}.'.format(
                    fullPath, error))
                noErrors = False

        # End for db in databases.

        if deleteOutdated:
            self.deleteOutdatedFiles(
                datetime.timedelta(days=int(
                    self.configer.configOptionValue('Export',
                                                    'export_days_to_keep'))))

        return uploaded if noErrors else None

    def moveToFinalPath(self, compressedFullPath=''):
        """
        Move a compressed final to the final export path.
        :param compressedFullPath: String for the compressed file.
        :return:
        """
        self.logger.log('Moving {} to final path.'.format(compressedFullPath),
                        'debug')
        try:
            shutil.move(
                compressedFullPath,
                self.configer.configOptionValue('Export',
                                                'db_export_final_path'))
        except Exception as detail:
            self.logger.log(
                'Exception while moving {} to final export path: {}'.format(
                    compressedFullPath, detail), 'error')

    def md5Verification(self, compressedFullPath='', fullPath='', md5sum1=''):
        """
        Perform md5 verification of a compressed file at compressedFullPath
        where the original file is at fullPath and has md5sum1.

        :param compressedFullPath: String
        :param fullPath: String
        :param md5sum1: String of md5sum of source file.
        :return:
        """

        GZIP_UNCOMPRESS_FILE = False
        if GZIP_UNCOMPRESS_FILE:
            # Verify the compressed file by uncompressing it and
            # verifying its
            # checksum against the original checksum.
            self.logger.log('reading: {}'.format(compressedFullPath), 'DEBUG')
            self.logger.log(
                'writing: {}'.format(
                    os.path.join(
                        self.configer.configOptionValue(
                            'Testing', 'export_test_data_path'),
                        os.path.splitext(os.path.basename(fullPath))[0])),
                'DEBUG')

            self.fileUtil.gzipUncompressFile(
                compressedFullPath,
                os.path.join(
                    self.configer.configOptionValue('Testing',
                                                    'export_test_data_path'),
                    fullPath))

        VERIFY_BY_CHECKSUM = False
        if VERIFY_BY_CHECKSUM:
            md5sum2 = self.fileUtil.md5Checksum(fullPath)

            self.logger.log(
                "mtime: {}, md5sum2: {}".format(
                    time.ctime(os.path.getmtime(fullPath)), md5sum2), 'INFO')

            if md5sum1 == md5sum2:
                self.logger.log(
                    'Compressed file has been validated by checksum.', 'INFO')
            else:
                noErrors = False

    def numberOfChunksToUse(self, fullPath):
        """
        Return the number of chunks to be used by the file splitter based on
        the file size of the file at fullPath.
        :param fullPath: String
        :returns: Int Number of chunks to create.
        """

        fsize = os.path.getsize(fullPath)
        self.logger.log('fullpath: {}, fsize: {}'.format(fullPath, fsize))
        if (fsize >= int(
                self.configer.configOptionValue('Export',
                                                'max_bytes_before_split'))):
            # Note that this does not make use of the remainder in the division.
            chunks = int(fsize / int(
                self.configer.configOptionValue('Export',
                                                'max_bytes_before_split')))
            self.logger.log('Will split with {} chunks.'.format(chunks))
            return chunks
        self.logger.log('Will NOT split file.', 'debug')
        return 1

    def uploadFileToCloudStorage(self,
                                 fullPath='',
                                 retryCount=0,
                                 testing=False):
        """
        Export a file to cloud storage.

        :param fullPath: String of file to be exported.
        :param testing: Boolean when set to True, Testing Mode is used.
        :param retryCount: Int of number of times to retry the upload if
        there is a failure.
        :returns: String File ID on verified on upload; None if verification
        fails.
        """

        success = True
        myFile = os.path.basename(fullPath)

        self.logger.log('full path {}'.format(os.path.dirname(fullPath),
                                              'DEBUG'))
        self.logger.log("Uploading {}.".format(myFile))

        result = {}
        try:
            media_body = MediaFileUpload(
                fullPath,
                mimetype='application/gzip-compressed',
                resumable=True)
            body = {
                'title': myFile,
                'description': 'Hawaii Smart Energy Project gzip '
                'compressed DB export.',
                'mimeType': 'application/gzip-compressed'
            }

            # Result is a Files resource.
            result = self.driveService.files().insert(
                body=body, media_body=media_body).execute()

        except Exception as detail:
            # Upload failures can result in a BadStatusLine.
            self.logger.log(
                "Exception while uploading {}: {}.".format(myFile, detail),
                'error')
            success = False

        if not self.__verifyMD5Sum(fullPath, self.fileIDForFileName(myFile)):
            self.logger.log('Failed MD5 checksum verification.', 'INFO')
            success = False

        if success:
            self.logger.log('Verification by MD5 checksum succeeded.', 'INFO')
            self.logger.log("Finished.")
            return result['id']

        if not success and retryCount <= 0:
            return None
        else:
            time.sleep(self.retryDelay)
            self.logger.log('Retrying upload of {}.'.format(fullPath),
                            'warning')
            self.uploadFileToCloudStorage(fullPath=fullPath,
                                          retryCount=retryCount - 1)

    def __retrieveCredentials(self):
        """
        Perform authorization at the server.

        Credentials are loaded into the object attribute googleAPICredentials.
        """

        flow = OAuth2WebServerFlow(self.clientID, self.clientSecret,
                                   self.oauthScope, self.oauthConsent)
        authorize_url = flow.step1_get_authorize_url()
        print 'Go to the following link in your browser: ' + authorize_url
        code = raw_input('Enter verification code: ').strip()
        self.googleAPICredentials = flow.step2_exchange(code)

        print "refresh_token = {}".format(
            self.googleAPICredentials.refresh_token)
        print "expiry = {}".format(self.googleAPICredentials.token_expiry)

    def freeSpace(self):
        """
        Get free space from the drive service.
        :param driveService: Object for the drive service.
        :returns: Int of free space (bytes B) on the drive service.
        """
        aboutData = self.driveService.about().get().execute()
        return int(aboutData['quotaBytesTotal']) - int(
            aboutData['quotaBytesUsed']) - int(
                aboutData['quotaBytesUsedInTrash'])

    def deleteFile(self, fileID=''):
        """
        Delete the file with ID fileID.
        :param fileID: String of a Google API file ID.
        """

        if not len(fileID) > 0:
            raise Exception("File ID has not been given.")

        self.logger.log(
            'Deleting file with file ID {} and name {}.'.format(
                fileID, self.filenameForFileID(fileID)), 'debug')

        try:
            # Writing the fileId arg name is required here.
            self.driveService.files().delete(fileId=fileID).execute()

        except errors.HttpError as error:
            self.logger.log('Exception while deleting: {}'.format(error),
                            'error')

    def deleteOutdatedFiles(self, maxAge=datetime.timedelta(weeks=9999999)):
        """
        Remove outdated files from cloud storage.

        :param minAge: datetime.timedelta of the minimum age before a file is
        considered outdated.
        :param maxAge: datetime.timedelta of the maximum age to consider for
        a file.
        :returns: Int count of deleted items.
        """

        # @todo Return count of actual successfully deleted files.

        outdated = self.outdatedFiles(maxAge)
        """:type : dict"""
        for f in outdated:
            self.deleteFile(f['id'])

        return len(outdated)

    def outdatedFiles(self,
                      daysBeforeOutdated=datetime.timedelta(days=9999999)):
        """
        Outdated files in the cloud where they are outdated if their age is
        greater than or equal to daysBeforeOutdated.

        Note: When t1 is the same day as t2, the timedelta comes back as -1.
        Not sure why this isn't represented as zero. Perhaps to avoid a false
        evaluation of a predicate on a tdelta.

        :param daysBeforeOutdated: datetime.timedelta where the value
        indicates that outdated files that have an age greater than this
        parameter.
        :return: Int count of deleted items.
        """

        t1 = lambda x: datetime.datetime.strptime(x['createdDate'],
                                                  "%Y-%m-%dT%H:%M:%S.%fZ")
        t2 = datetime.datetime.now()

        return filter(lambda x: t2 - t1(x) >= daysBeforeOutdated,
                      self.cloudFiles['items'])

    def sendNotificationOfFiles(self):
        """
        Provide a notification that lists the export files along with sharing
        links.
        """

        pass

    def sendDownloadableFiles(self):
        """
        Send available files via HTTP POST.
        :returns: None
        """

        myPath = '{}/{}'.format(self.exportTempWorkPath,
                                'list-of-downloadable-files.txt')

        fp = open(myPath, 'wb')

        output = StringIO()
        output.write(self.markdownListOfDownloadableFiles())

        fp.write(self.markdownListOfDownloadableFiles())
        fp.close()

        headers = {'User-Agent': self.postAgent, 'Content-Type': 'text/html'}
        try:
            r = requests.post(self.configer.configOptionValue(
                'Export', 'export_list_post_url'),
                              output.getvalue(),
                              headers=headers)
            print 'text: {}'.format(r.text)
        except requests.adapters.SSLError as error:
            # @todo Implement alternative verification.
            self.logger.log('SSL error: {}'.format(error), 'error')

        output.close()

    def metadataOfFileID(self, fileID=''):
        """
        :param fileID: String of a file ID in the cloud.
        :return: Tuple of metadata (name, url, timestamp, size) for a given
        file ID.
        """
        item = [i for i in self.cloudFiles['items'] if i['id'] == fileID][0]
        return (item[u'originalFilename'], item[u'webContentLink'],
                item[u'createdDate'], item[u'fileSize'])

    def listOfDownloadableFiles(self):
        """
        Create a list of downloadable files.
        :returns: List of dicts of files that are downloadable from the cloud.
        """

        files = []
        for i in reversed(
                sorted(self.cloudFiles['items'],
                       key=lambda k: k['createdDate'])):
            item = dict()
            item['title'] = i['title']
            item['webContentLink'] = i['webContentLink']
            item['id'] = i['id']
            item['createdDate'] = i['createdDate']
            item['fileSize'] = i['fileSize']
            files.append(item)
        return files

    def markdownListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in Markdown
        format.

        :returns: String content in Markdown format.
        """

        content = "||*Name*||*Created*||*Size*||\n"
        for i in self.listOfDownloadableFiles():
            content += "||[`{}`]({})".format(i['title'], i['webContentLink'])
            content += "||`{}`".format(i['createdDate'])
            content += "||`{} B`||".format(int(i['fileSize']))
            content += '\n'

        # self.logger.log('content: {}'.format(content))
        return content

    def plaintextListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in plaintext
        format.

        :returns: String content as plaintext.
        """
        content = ''
        includeLink = False
        for i in reversed(
                sorted(self.cloudFiles['items'],
                       key=lambda k: k['createdDate'])):
            if includeLink:
                content += "{}, {}, {}, {} B\n".format(i['title'],
                                                       i['webContentLink'],
                                                       i['createdDate'],
                                                       int(i['fileSize']))
            else:
                content += "{}, {}, {} B\n".format(i['title'],
                                                   i['createdDate'],
                                                   int(i['fileSize']))

        return content

    def logSuccessfulExport(self, name='', url='', datetime=0, size=0):
        """
        When an export has been successful, log information about the export
        to the database.

        The items to log include:
        * filename
        * URL
        * timestamp
        * filesize

        :param name: String
        :param url: String
        :param datetime:
        :param size: Int
        :return: True if no errors occurred, else False.
        """
        def exportHistoryColumns():
            return ['name', 'url', 'timestamp', 'size']

        timestamp = lambda \
                datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \
                                                                  "'{}'".format(
            datetime)

        sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format(
            self.configer.configOptionValue('Export', 'export_history_table'),
            ','.join(exportHistoryColumns()), "'" + name + "'",
            "'" + url + "'", timestamp(datetime), size)

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        result = dbUtil.executeSQL(cursor, sql, exitOnFail=False)
        conn.commit()
        return result

    def sendExportSummary(self, summary=''):
        """
        Send a summary of exports via email to a preconfigured list of
        recipients.
        :param summary: String of summary content.
        :return:
        """
        try:
            if self.notifier.sendNotificationEmail(summary, testing=False):
                self.notifier.recordNotificationEvent(
                    types=MSGNotificationHistoryTypes,
                    noticeType=MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        except Exception as detail:
            self.logger.log('Exception occurred: {}'.format(detail), 'ERROR')

    def currentExportSummary(self):
        """
        Current summary of exports since the last summary report time.

        Summaries are reported with identifier MSG_EXPORT_SUMMARY in the
        NotificationHistory.

        Includes:
        * Number of databases exported
        * Total number of files in the cloud.
        * A report of available storage capacity.
        * A list of available DBs.
        * A link where exports can be accessed.

        :return: String of summary text.
        """
        availableFilesURL = self.configer.configOptionValue(
            'Export', 'export_list_url')
        lastReportDate = self.notifier.lastReportDate(
            types=MSGNotificationHistoryTypes,
            noticeType=MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        content = 'Cloud Export Summary:\n\n'
        content += 'Last report date: {}\n'.format(lastReportDate)

        # @TO BE REVIEWED: Verify time zone adjustment.
        content += '{} databases have been exported since the last report ' \
                   'date.\n'.format(self.countOfDBExports(
            lastReportDate + datetime.timedelta(
                hours = 10)) if lastReportDate else self.countOfDBExports())

        content += '{} B free space is available.\n'.format(self.freeSpace())
        content += '\nCurrently available DBs:\n'
        content += self.plaintextListOfDownloadableFiles()
        content += '\n{} files can be accessed through Google Drive (' \
                   'https://drive.google.com) or at {}.'.format(
            self.countOfCloudFiles(), availableFilesURL)

        return content

    def countOfDBExports(self, since=None):
        """
        :param since: datetime indicating last export datetime.
        :return: Int of count of exports.
        """
        myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S')
        if not since:
            since = myDatetime('1900-01-01 00:00')
        self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG')

        sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \
              '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format(
            since.strftime('%Y-%m-%d %H:%M'))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        rows = None
        if dbUtil.executeSQL(cursor, sql, exitOnFail=False):
            rows = cursor.fetchall()
        assert len(rows) == 1, 'Invalid return value.'
        return rows[0][0]

    def countOfCloudFiles(self):
        """
        :param since: datetime indicating last trailing export datetime.
        :return: Int of count of exports.
        """
        return len(self.cloudFiles['items'])

    def __verifyMD5Sum(self, localFilePath, remoteFileID):
        """
        Verify that the local MD5 sum matches the MD5 sum for the remote file
        corresponding to an ID.

        This verifies that the uploaded file matches the local compressed
        export file.

        :param localFilePath: String of the full path of the local file.
        :param remoteFileID: String of the cloud ID for the remote file.
        :returns: Boolean True if the MD5 sums match, otherwise, False.
        """

        self.logger.log('remote file ID: {}'.format(remoteFileID))
        self.logger.log('local file path: {}'.format(localFilePath))

        # Get the md5sum for the local file.
        f = open(localFilePath, mode='rb')
        fContent = hashlib.md5()
        for buf in iter(partial(f.read, 128), b''):
            fContent.update(buf)
        localMD5Sum = fContent.hexdigest()
        f.close()

        self.logger.log('local md5: {}'.format(localMD5Sum), 'DEBUG')

        def verifyFile():
            # Get the MD5 sum for the remote file.
            for item in self.cloudFiles['items']:
                if (item['id'] == remoteFileID):
                    self.logger.log(
                        'remote md5: {}'.format(item['md5Checksum']), 'DEBUG')
                    if localMD5Sum == item['md5Checksum']:
                        return True
                    else:
                        return False

        try:
            if verifyFile():
                return True
            else:
                return False

        except errors.HttpError as detail:
            self.logger.log('HTTP error during MD5 verification.', 'error')

            time.sleep(10)

            if verifyFile():
                return True
            else:
                return False

    def fileIDForFileName(self, filename):
        """
        Get the file ID for the given filename.

        This method supports matching multiple cloud filenames but only
        returns the ID for a single matching filename.

        This can then be called recursively to obtain all the file IDs for a
        given filename.

        :param String of the filename for which to retrieve the ID.
        :returns: String of a cloud file ID or None if no match.
        """
        fileIDList = filter(lambda x: x['originalFilename'] == filename,
                            self.cloudFiles['items'])
        return fileIDList[0]['id'] if len(fileIDList) > 0 else None

    def filenameForFileID(self, fileID=''):
        """
        :param fileID: String of cloud-based file ID.
        :return: String of filename for a given file ID.
        """
        return filter(lambda x: x['id'] == fileID,
                      self.cloudFiles['items'])[0]['originalFilename']

    def addReaders(self, fileID=None, emailAddressList=None, retryCount=0):
        """
        Add reader permission to an export file that has been uploaded to the
        cloud for the given list of email addresses.

        Email notification is suppressed by default.

        :param fileID: String of the cloud file ID to be processed.
        :param emailAddressList: List of email addresses.
        :returns: Boolean True if successful, otherwise False.
        """
        # @todo Provide support for retry count
        success = True

        self.logger.log('file id: {}'.format(fileID))
        self.logger.log('address list: {}'.format(emailAddressList))

        for addr in emailAddressList:
            permission = {'value': addr, 'type': 'user', 'role': 'reader'}

            if fileID:
                try:
                    resp = self.driveService.permissions().insert(
                        fileId=fileID,
                        sendNotificationEmails=False,
                        body=permission).execute()
                    self.logger.log(
                        'Reader permission added for {}.'.format(addr))
                except errors.HttpError as error:
                    self.logger.log('An error occurred: {}'.format(error))
                    success = False

        if not success and retryCount <= 0:
            return False
        elif success:
            return True
        else:
            time.sleep(self.retryDelay)
            self.logger.log(
                'Retrying adding readers for ID {}.'.format(fileID), 'warning')
            self.addReaders(fileID=fileID,
                            emailAddressList=emailAddressList,
                            retryCount=retryCount - 1)
class MSGDBExporter(object):
    """
    Export MSG DBs as SQL scripts.

    Supports export to local storage and to cloud storage.

    Usage:

    from msg_db_exporter import MSGDBExporter
    exporter = MSGDBExporter()

    Public API:

    exportDB(databases:List, 
             toCloud:Boolean, 
             testing:Boolean,
             numChunks:Integer, 
             deleteOutdated:Boolean): Export a list of DBs to the cloud.
    """

    # List of cloud files.
    @property
    def cloudFiles(self):
        self._cloudFiles = self.driveService.files().list().execute()
        return self._cloudFiles

    @property
    def driveService(self):
        if self._driveService:
            return self._driveService

        if not self.credentialPath:
            raise Exception("Credential path is required.")
        storage = Storage(
            '{}/google_api_credentials'.format(self.credentialPath))

        self.googleAPICredentials = storage.get()

        self.logger.log("Authorizing credentials.", 'info')
        http = httplib2.Http()
        http = self.googleAPICredentials.authorize(http)

        self.logger.log("Authorized.", 'info')

        self._driveService = build('drive', 'v2', http = http)

        return self._driveService


    def __init__(self):
        """
        Constructor.
        """

        self.logger = SEKLogger(__name__, 'DEBUG', useColor = False)
        self.timeUtil = MSGTimeUtil()
        self.configer = MSGConfiger()
        self.fileUtil = MSGFileUtil()
        self.pythonUtil = MSGPythonUtil()  # for debugging
        self.connector = MSGDBConnector()
        self.conn = self.connector.connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = SEKNotifier(connector = self.connector,
                                    dbUtil = self.dbUtil,
                                    user = self.configer.configOptionValue(
                                        'Notifications', 'email_username'),
                                    password = self.configer.configOptionValue(
                                        'Notifications', 'email_password'),
                                    fromaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_from_address'),
                                    toaddr = self.configer.configOptionValue(
                                        'Notifications', 'email_recipients'),
                                    testing_toaddr =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'testing_email_recipients'),
                                    smtp_server_and_port =
                                    self.configer.configOptionValue(
                                        'Notifications',
                                        'smtp_server_and_port'))

        # Google Drive parameters.
        self.clientID = self.configer.configOptionValue('Export',
                                                        'google_api_client_id')
        self.clientSecret = self.configer.configOptionValue('Export',
                                                            'google_api_client_secret')
        self.oauthScope = 'https://www.googleapis.com/auth/drive'
        self.oauthConsent = 'urn:ietf:wg:oauth:2.0:oob'
        self.googleAPICredentials = ''
        self.exportTempWorkPath = self.configer.configOptionValue('Export',
                                                                  'db_export_work_path')

        self.credentialPath = self.configer.configOptionValue('Export',
                                                              'google_api_credentials_path')
        self.credentialStorage = Storage(
            '{}/google_api_credentials'.format(self.credentialPath))

        self._driveService = None
        self._cloudFiles = None
        self.postAgent = 'Maui Smart Grid 1.0.0 DB Exporter'
        self.retryDelay = 10
        self.availableFilesURL = ''


    def verifyExportChecksum(self, testing = False):
        """
        Verify the compressed export file using a checksum.

        * Save the checksum of the original uncompressed export data.
        * Extract the compressed file.
        * Verify the uncompressed export data.

        :param testing: When set to True, Testing Mode is used.
        """

        # Get the checksum of the original file.
        md5sum = self.fileUtil.md5Checksum(self.exportTempWorkPath)
        self.logger.log('md5sum: {}'.format(md5sum))


    def db_username(self):
        return "postgres"
        # return self.configer.configOptionValue('Database', 'db_username')

    def db_password(self):
        return self.configer.configOptionValue('Database', 'db_password')

    def db_port(self):
        return self.configer.configOptionValue('Database', 'db_port')


    def dumpCommand(self, db = '', dumpName = ''):
        """
        This method makes use of

        pg_dump -s -p ${PORT}
                   -U ${USERNAME}
                   [-T ${OPTIONAL_TABLE_EXCLUSIONS}]
                   ${DB_NAME} >
                   ${EXPORT_TEMP_WORK_PATH}/${DUMP_TIMESTAMP}_{DB_NAME}.sql

        :param db: String
        :param dumpName: String
        :return: String of command used to export DB.
        """

        # For reference only:
        # Password is passed from ~/.pgpass.
        # Note that ':' and '\' characters should be escaped with '\'.
        # Ref: http://www.postgresql.org/docs/9.1/static/libpq-pgpass.html

        # Dump databases as the superuser. This method does not require a
        # stored password when running under a root crontab.
        if not db or not dumpName:
            raise Exception('DB and dumpname required.')

        # Process exclusions.

        exclusions = self.dumpExclusionsDictionary()
        excludeList = []
        if db in exclusions:
            excludeList = exclusions[db]
        excludeString = ''
        if len(excludeList) > 0 and exclusions != None:
            for e in excludeList:
                excludeString += """-T '"{}"' """.format(e)

        return 'sudo -u postgres pg_dump -p {0} -U {1} {5} {2} > {3}/{4}' \
               '.sql'.format(self.db_port(), self.db_username(), db,
                             self.exportTempWorkPath, dumpName, excludeString)


    def dumpExclusionsDictionary(self):
        """
        :param db: String of DB name for which to retrieve exclusions.
        :return: Dictionary with keys as DBs and values as lists of tables to
        be excluded for a given database.
        """
        try:
            if type(eval(self.configer.configOptionValue('Export',
                                                         'db_export_exclusions'))) == type(
                    {}):
                return eval(self.configer.configOptionValue('Export',
                                                            'db_export_exclusions'))
            else:
                return None
        except SyntaxError as detail:
            self.logger.log(
                'SyntaxError exception while getting exclusions: {}'.format(
                    detail))


    def dumpName(self, db = ''):
        """
        :param db: String
        :return: String of file name used for dump file of db.
        """
        if not db:
            raise Exception('DB required.')
        return "{}_{}".format(self.timeUtil.conciseNow(), db)


    def filesToUpload(self, compressedFullPath = '', numChunks = 0,
                      chunkSize = 0):
        """
        :param compressedFullPath: String
        :param numChunks: Int
        :param chunkSize: Int
        :return: List of files to be uploaded according to their split
        sections, if applicable.
        """
        if numChunks != 0:
            self.logger.log('Splitting {}'.format(compressedFullPath), 'DEBUG')

            filesToUpload = self.fileUtil.splitLargeFile(
                fullPath = compressedFullPath, chunkSize = chunkSize,
                numChunks = numChunks)

            if not filesToUpload:
                raise Exception('Exception during file splitting.')
            else:
                self.logger.log('to upload: {}'.format(filesToUpload), 'debug')
                return filesToUpload

        else:
            return [compressedFullPath]


    def dumpResult(self, db = '', dumpName = '', fullPath = ''):
        """
        :param dumpName: String of filename of dump file.
        :param fullPath: String of full path to dump file.
        :return: Boolean True if dump operation was successful, otherwise False.
        """

        success = True

        self.logger.log('fullPath: {}'.format(fullPath), 'DEBUG')

        try:
            # Generate the SQL script export.
            # @todo check return value of dump command
            self.logger.log('cmd: {}'.format(
                self.dumpCommand(db = db, dumpName = dumpName)))
            subprocess.check_call(
                self.dumpCommand(db = db, dumpName = dumpName), shell = True)
        except subprocess.CalledProcessError as error:
            self.logger.log("Exception while dumping: {}".format(error))
            sys.exit(-1)

        return success


    def exportDBs(self, databases = None, toCloud = False, localExport = True,
                  testing = False, chunkSize = 0, deleteOutdated = False):
        """
        Export a set of DBs to local storage.

        :param databases: List of database names that will be exported.
        :param toCloud: Boolean if set to True, then the export will also be
        copied to cloud storage.
        :param localExport: Boolean when set to True the DB is exported
        locally.
        :param testing: Boolean flag for testing mode. (@DEPRECATED)
        :param chunkSize: Integer size in bytes of chunk size used for
        splitting.
        :param deleteOutdated: Boolean indicating outdated files in the cloud
        should be removed.
        :returns: List of file IDs of uploaded files or None if there is an
        error condition.
        """

        # @todo separate uploading and exporting functions

        noErrors = True
        uploaded = []

        for db in databases:
            self.logger.log('Exporting {} using pg_dump.'.format(db), 'info')

            dumpName = self.dumpName(db = db)
            fullPath = '{}/{}.sql'.format(self.exportTempWorkPath, dumpName)
            if localExport:
                noErrors = self.dumpResult(db, dumpName, fullPath)

            # Perform compression of the file.
            self.logger.log("Compressing {} using gzip.".format(db), 'info')
            self.logger.log('fullpath: {}'.format(fullPath), 'DEBUG')

            gzipResult = self.fileUtil.gzipCompressFile(fullPath)
            compressedFullPath = '{}{}'.format(fullPath, '.gz')
            numChunks = self.numberOfChunksToUse(compressedFullPath)

            # Gzip uncompress and verify by checksum is disabled until a more
            # efficient, non-memory-based, uncompress is implemented.
            # md5sum1 = self.fileUtil.md5Checksum(fullPath)
            # self.md5Verification(compressedFullPath=compressedFullPath,
            # fullPath=fullPath,md5sum1=md5sum1)

            if toCloud:
                # Split compressed files into a set of chunks to improve the
                # reliability of uploads.

                # Upload the files to the cloud.
                for f in self.filesToUpload(
                        compressedFullPath = compressedFullPath,
                        numChunks = numChunks, chunkSize = chunkSize):
                    self.logger.log('Uploading {}.'.format(f), 'info')
                    fileID = self.uploadFileToCloudStorage(fullPath = f,
                                                           testing = testing,
                                                           retryCount = int(
                                                               self.configer.configOptionValue(
                                                                   'Export',
                                                                   'export_retry_count')))

                    self.logger.log('file id after upload: {}'.format(fileID))

                    if fileID != None:
                        uploaded.append(fileID)
                        self.logger.log('uploaded: {}'.format(uploaded),
                                        'DEBUG')
                        if not self.addReaders(fileID,
                                               self.configer.configOptionValue(
                                                       'Export',
                                                       'reader_permission_email_addresses').split(
                                                       ','), retryCount = int(
                                        self.configer.configOptionValue(
                                                'Export',
                                                'export_retry_count'))):
                            self.logger.log(
                                'Failed to add readers for {}.'.format(f),
                                'error')
                        self.logSuccessfulExport(*self.metadataOfFileID(fileID))

                    # Remove split sections if they exist.
                    try:
                        if not testing and numChunks > 1:
                            self.logger.log('Removing {}'.format(f))
                            os.remove('{}'.format(f))
                    except OSError as error:
                        self.logger.log(
                            'Exception while removing {}: {}.'.format(fullPath,
                                                                      error))
                        noErrors = False

            # End if toCloud.

            if gzipResult:
                self.moveToFinalPath(compressedFullPath = compressedFullPath)

            # Remove the uncompressed file.
            try:
                if not testing:
                    self.logger.log('Removing {}'.format(fullPath))
                    os.remove('{}'.format(fullPath))
            except OSError as error:
                self.logger.log(
                    'Exception while removing {}: {}.'.format(fullPath, error))
                noErrors = False

        # End for db in databases.

        if deleteOutdated:
            self.deleteOutdatedFiles(datetime.timedelta(days = int(
                self.configer.configOptionValue('Export',
                                                'export_days_to_keep'))))

        return uploaded if noErrors else None


    def moveToFinalPath(self, compressedFullPath = ''):
        """
        Move a compressed final to the final export path.
        :param compressedFullPath: String for the compressed file.
        :return:
        """
        self.logger.log('Moving {} to final path.'.format(compressedFullPath),
                        'debug')
        try:
            shutil.move(compressedFullPath,
                        self.configer.configOptionValue('Export',
                                                        'db_export_final_path'))
        except Exception as detail:
            self.logger.log(
                'Exception while moving {} to final export path: {}'.format(
                    compressedFullPath, detail), 'error')


    def md5Verification(self, compressedFullPath = '', fullPath = '',
                        md5sum1 = ''):
        """
        Perform md5 verification of a compressed file at compressedFullPath
        where the original file is at fullPath and has md5sum1.

        :param compressedFullPath: String
        :param fullPath: String
        :param md5sum1: String of md5sum of source file.
        :return:
        """

        GZIP_UNCOMPRESS_FILE = False
        if GZIP_UNCOMPRESS_FILE:
            # Verify the compressed file by uncompressing it and
            # verifying its
            # checksum against the original checksum.
            self.logger.log('reading: {}'.format(compressedFullPath), 'DEBUG')
            self.logger.log('writing: {}'.format(os.path.join(
                self.configer.configOptionValue('Testing',
                                                'export_test_data_path'),
                os.path.splitext(os.path.basename(fullPath))[0])), 'DEBUG')

            self.fileUtil.gzipUncompressFile(compressedFullPath, os.path.join(
                self.configer.configOptionValue('Testing',
                                                'export_test_data_path'),
                fullPath))

        VERIFY_BY_CHECKSUM = False
        if VERIFY_BY_CHECKSUM:
            md5sum2 = self.fileUtil.md5Checksum(fullPath)

            self.logger.log("mtime: {}, md5sum2: {}".format(
                time.ctime(os.path.getmtime(fullPath)), md5sum2), 'INFO')

            if md5sum1 == md5sum2:
                self.logger.log(
                    'Compressed file has been validated by checksum.', 'INFO')
            else:
                noErrors = False

    def numberOfChunksToUse(self, fullPath):
        """
        Return the number of chunks to be used by the file splitter based on
        the file size of the file at fullPath.
        :param fullPath: String
        :returns: Int Number of chunks to create.
        """

        fsize = os.path.getsize(fullPath)
        self.logger.log('fullpath: {}, fsize: {}'.format(fullPath, fsize))
        if (fsize >= int(self.configer.configOptionValue('Export',
                                                         'max_bytes_before_split'))):
            # Note that this does not make use of the remainder in the division.
            chunks = int(fsize / int(self.configer.configOptionValue('Export',
                                                                     'max_bytes_before_split')))
            self.logger.log('Will split with {} chunks.'.format(chunks))
            return chunks
        self.logger.log('Will NOT split file.', 'debug')
        return 1


    def uploadFileToCloudStorage(self, fullPath = '', retryCount = 0,
                                 testing = False):
        """
        Export a file to cloud storage.

        :param fullPath: String of file to be exported.
        :param testing: Boolean when set to True, Testing Mode is used.
        :param retryCount: Int of number of times to retry the upload if
        there is a failure.
        :returns: String File ID on verified on upload; None if verification
        fails.
        """

        success = True
        myFile = os.path.basename(fullPath)

        self.logger.log(
            'full path {}'.format(os.path.dirname(fullPath), 'DEBUG'))
        self.logger.log("Uploading {}.".format(myFile))

        result = {}
        try:
            media_body = MediaFileUpload(fullPath,
                                         mimetype =
                                         'application/gzip-compressed',
                                         resumable = True)
            body = {'title': myFile,
                    'description': 'Hawaii Smart Energy Project gzip '
                                   'compressed DB export.',
                    'mimeType': 'application/gzip-compressed'}

            # Result is a Files resource.
            result = self.driveService.files().insert(body = body,
                                                      media_body =
                                                      media_body).execute()

        except Exception as detail:
            # Upload failures can result in a BadStatusLine.
            self.logger.log(
                "Exception while uploading {}: {}.".format(myFile, detail),
                'error')
            success = False

        if not self.__verifyMD5Sum(fullPath, self.fileIDForFileName(myFile)):
            self.logger.log('Failed MD5 checksum verification.', 'INFO')
            success = False

        if success:
            self.logger.log('Verification by MD5 checksum succeeded.', 'INFO')
            self.logger.log("Finished.")
            return result['id']

        if not success and retryCount <= 0:
            return None
        else:
            time.sleep(self.retryDelay)
            self.logger.log('Retrying upload of {}.'.format(fullPath),
                            'warning')
            self.uploadFileToCloudStorage(fullPath = fullPath,
                                          retryCount = retryCount - 1)


    def __retrieveCredentials(self):
        """
        Perform authorization at the server.

        Credentials are loaded into the object attribute googleAPICredentials.
        """

        flow = OAuth2WebServerFlow(self.clientID, self.clientSecret,
                                   self.oauthScope, self.oauthConsent)
        authorize_url = flow.step1_get_authorize_url()
        print 'Go to the following link in your browser: ' + authorize_url
        code = raw_input('Enter verification code: ').strip()
        self.googleAPICredentials = flow.step2_exchange(code)

        print "refresh_token = {}".format(
            self.googleAPICredentials.refresh_token)
        print "expiry = {}".format(self.googleAPICredentials.token_expiry)


    def freeSpace(self):
        """
        Get free space from the drive service.
        :param driveService: Object for the drive service.
        :returns: Int of free space (bytes B) on the drive service.
        """
        aboutData = self.driveService.about().get().execute()
        return int(aboutData['quotaBytesTotal']) - int(
            aboutData['quotaBytesUsed']) - int(
            aboutData['quotaBytesUsedInTrash'])


    def deleteFile(self, fileID = ''):
        """
        Delete the file with ID fileID.
        :param fileID: String of a Google API file ID.
        """

        if not len(fileID) > 0:
            raise Exception("File ID has not been given.")

        self.logger.log(
            'Deleting file with file ID {} and name {}.'.format(fileID,
                                                                self.filenameForFileID(
                                                                    fileID)),
            'debug')

        try:
            # Writing the fileId arg name is required here.
            self.driveService.files().delete(fileId = fileID).execute()

        except errors.HttpError as error:
            self.logger.log('Exception while deleting: {}'.format(error),
                            'error')


    def deleteOutdatedFiles(self, maxAge = datetime.timedelta(weeks = 9999999)):
        """
        Remove outdated files from cloud storage.

        :param minAge: datetime.timedelta of the minimum age before a file is
        considered outdated.
        :param maxAge: datetime.timedelta of the maximum age to consider for
        a file.
        :returns: Int count of deleted items.
        """

        # @todo Return count of actual successfully deleted files.

        outdated = self.outdatedFiles(maxAge)

        """:type : dict"""
        for f in outdated:
            self.deleteFile(f['id'])

        return len(outdated)


    def outdatedFiles(self,
                      daysBeforeOutdated = datetime.timedelta(days = 9999999)):
        """
        Outdated files in the cloud where they are outdated if their age is
        greater than or equal to daysBeforeOutdated.

        Note: When t1 is the same day as t2, the timedelta comes back as -1.
        Not sure why this isn't represented as zero. Perhaps to avoid a false
        evaluation of a predicate on a tdelta.

        :param daysBeforeOutdated: datetime.timedelta where the value
        indicates that outdated files that have an age greater than this
        parameter.
        :return: Int count of deleted items.
        """

        t1 = lambda x: datetime.datetime.strptime(x['createdDate'],
                                                  "%Y-%m-%dT%H:%M:%S.%fZ")
        t2 = datetime.datetime.now()

        return filter(lambda x: t2 - t1(x) >= daysBeforeOutdated,
                      self.cloudFiles['items'])


    def sendNotificationOfFiles(self):
        """
        Provide a notification that lists the export files along with sharing
        links.
        """

        pass


    def sendDownloadableFiles(self):
        """
        Send available files via HTTP POST.
        :returns: None
        """

        myPath = '{}/{}'.format(self.exportTempWorkPath,
                                'list-of-downloadable-files.txt')

        fp = open(myPath, 'wb')

        output = StringIO()
        output.write(self.markdownListOfDownloadableFiles())

        fp.write(self.markdownListOfDownloadableFiles())
        fp.close()

        headers = {'User-Agent': self.postAgent, 'Content-Type': 'text/html'}
        try:
            r = requests.post(self.configer.configOptionValue('Export',
                                                              'export_list_post_url'),
                              output.getvalue(), headers = headers)
            print 'text: {}'.format(r.text)
        except requests.adapters.SSLError as error:
            # @todo Implement alternative verification.
            self.logger.log('SSL error: {}'.format(error), 'error')

        output.close()


    def metadataOfFileID(self, fileID = ''):
        """
        :param fileID: String of a file ID in the cloud.
        :return: Tuple of metadata (name, url, timestamp, size) for a given
        file ID.
        """
        item = [i for i in self.cloudFiles['items'] if i['id'] == fileID][0]
        return (item[u'originalFilename'], item[u'webContentLink'],
                item[u'createdDate'], item[u'fileSize'])


    def listOfDownloadableFiles(self):
        """
        Create a list of downloadable files.
        :returns: List of dicts of files that are downloadable from the cloud.
        """

        files = []
        for i in reversed(sorted(self.cloudFiles['items'],
                                 key = lambda k: k['createdDate'])):
            item = dict()
            item['title'] = i['title']
            item['webContentLink'] = i['webContentLink']
            item['id'] = i['id']
            item['createdDate'] = i['createdDate']
            item['fileSize'] = i['fileSize']
            files.append(item)
        return files


    def markdownListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in Markdown
        format.

        :returns: String content in Markdown format.
        """

        content = "||*Name*||*Created*||*Size*||\n"
        for i in self.listOfDownloadableFiles():
            content += "||[`{}`]({})".format(i['title'], i['webContentLink'])
            content += "||`{}`".format(i['createdDate'])
            content += "||`{} B`||".format(int(i['fileSize']))
            content += '\n'

        # self.logger.log('content: {}'.format(content))
        return content


    def plaintextListOfDownloadableFiles(self):
        """
        Generate content containing a list of downloadable files in plaintext
        format.

        :returns: String content as plaintext.
        """
        content = ''
        includeLink = False
        for i in reversed(sorted(self.cloudFiles['items'],
                                 key = lambda k: k['createdDate'])):
            if includeLink:
                content += "{}, {}, {}, {} B\n".format(i['title'],
                                                       i['webContentLink'],
                                                       i['createdDate'],
                                                       int(i['fileSize']))
            else:
                content += "{}, {}, {} B\n".format(i['title'], i['createdDate'],
                                                   int(i['fileSize']))

        return content


    def logSuccessfulExport(self, name = '', url = '', datetime = 0, size = 0):
        """
        When an export has been successful, log information about the export
        to the database.

        The items to log include:
        * filename
        * URL
        * timestamp
        * filesize

        :param name: String
        :param url: String
        :param datetime:
        :param size: Int
        :return: True if no errors occurred, else False.
        """

        def exportHistoryColumns():
            return ['name', 'url', 'timestamp', 'size']

        timestamp = lambda \
                datetime: 'to_timestamp(0)' if datetime == 0 else "timestamp " \
                                                                  "'{}'".format(
            datetime)

        sql = 'INSERT INTO "{0}" ({1}) VALUES ({2}, {3}, {4}, {5})'.format(
            self.configer.configOptionValue('Export', 'export_history_table'),
            ','.join(exportHistoryColumns()), "'" + name + "'", "'" + url + "'",
            timestamp(datetime), size)

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        result = dbUtil.executeSQL(cursor, sql, exitOnFail = False)
        conn.commit()
        return result


    def sendExportSummary(self, summary = ''):
        """
        Send a summary of exports via email to a preconfigured list of
        recipients.
        :param summary: String of summary content.
        :return:
        """
        try:
            if self.notifier.sendNotificationEmail(summary, testing = False):
                self.notifier.recordNotificationEvent(
                    types = MSGNotificationHistoryTypes,
                    noticeType = MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        except Exception as detail:
            self.logger.log('Exception occurred: {}'.format(detail), 'ERROR')


    def currentExportSummary(self):
        """
        Current summary of exports since the last summary report time.

        Summaries are reported with identifier MSG_EXPORT_SUMMARY in the
        NotificationHistory.

        Includes:
        * Number of databases exported
        * Total number of files in the cloud.
        * A report of available storage capacity.
        * A list of available DBs.
        * A link where exports can be accessed.

        :return: String of summary text.
        """
        availableFilesURL = self.configer.configOptionValue('Export',
                                                            'export_list_url')
        lastReportDate = self.notifier.lastReportDate(
            types = MSGNotificationHistoryTypes,
            noticeType = MSGNotificationHistoryTypes.MSG_EXPORT_SUMMARY)
        content = 'Cloud Export Summary:\n\n'
        content += 'Last report date: {}\n'.format(lastReportDate)

        # @TO BE REVIEWED: Verify time zone adjustment.
        content += '{} databases have been exported since the last report ' \
                   'date.\n'.format(self.countOfDBExports(
            lastReportDate + datetime.timedelta(
                hours = 10)) if lastReportDate else self.countOfDBExports())

        content += '{} B free space is available.\n'.format(self.freeSpace())
        content += '\nCurrently available DBs:\n'
        content += self.plaintextListOfDownloadableFiles()
        content += '\n{} files can be accessed through Google Drive (' \
                   'https://drive.google.com) or at {}.'.format(
            self.countOfCloudFiles(), availableFilesURL)

        return content


    def countOfDBExports(self, since = None):
        """
        :param since: datetime indicating last export datetime.
        :return: Int of count of exports.
        """
        myDatetime = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%S')
        if not since:
            since = myDatetime('1900-01-01 00:00')
        self.logger.log(since.strftime('%Y-%m-%d %H:%M'), 'DEBUG')

        sql = 'SELECT COUNT("public"."ExportHistory"."timestamp") FROM ' \
              '"public"."ExportHistory" WHERE "timestamp" > \'{}\''.format(
            since.strftime('%Y-%m-%d %H:%M'))

        conn = MSGDBConnector().connectDB()
        cursor = conn.cursor()
        dbUtil = MSGDBUtil()
        rows = None
        if dbUtil.executeSQL(cursor, sql, exitOnFail = False):
            rows = cursor.fetchall()
        assert len(rows) == 1, 'Invalid return value.'
        return rows[0][0]


    def countOfCloudFiles(self):
        """
        :param since: datetime indicating last trailing export datetime.
        :return: Int of count of exports.
        """
        return len(self.cloudFiles['items'])


    def __verifyMD5Sum(self, localFilePath, remoteFileID):
        """
        Verify that the local MD5 sum matches the MD5 sum for the remote file
        corresponding to an ID.

        This verifies that the uploaded file matches the local compressed
        export file.

        :param localFilePath: String of the full path of the local file.
        :param remoteFileID: String of the cloud ID for the remote file.
        :returns: Boolean True if the MD5 sums match, otherwise, False.
        """

        self.logger.log('remote file ID: {}'.format(remoteFileID))
        self.logger.log('local file path: {}'.format(localFilePath))

        # Get the md5sum for the local file.
        f = open(localFilePath, mode = 'rb')
        fContent = hashlib.md5()
        for buf in iter(partial(f.read, 128), b''):
            fContent.update(buf)
        localMD5Sum = fContent.hexdigest()
        f.close()

        self.logger.log('local md5: {}'.format(localMD5Sum), 'DEBUG')

        def verifyFile():
            # Get the MD5 sum for the remote file.
            for item in self.cloudFiles['items']:
                if (item['id'] == remoteFileID):
                    self.logger.log(
                        'remote md5: {}'.format(item['md5Checksum']), 'DEBUG')
                    if localMD5Sum == item['md5Checksum']:
                        return True
                    else:
                        return False

        try:
            if verifyFile():
                return True
            else:
                return False

        except errors.HttpError as detail:
            self.logger.log('HTTP error during MD5 verification.', 'error')

            time.sleep(10)

            if verifyFile():
                return True
            else:
                return False


    def fileIDForFileName(self, filename):
        """
        Get the file ID for the given filename.

        This method supports matching multiple cloud filenames but only
        returns the ID for a single matching filename.

        This can then be called recursively to obtain all the file IDs for a
        given filename.

        :param String of the filename for which to retrieve the ID.
        :returns: String of a cloud file ID or None if no match.
        """
        fileIDList = filter(lambda x: x['originalFilename'] == filename,
                            self.cloudFiles['items'])
        return fileIDList[0]['id'] if len(fileIDList) > 0 else None


    def filenameForFileID(self, fileID = ''):
        """
        :param fileID: String of cloud-based file ID.
        :return: String of filename for a given file ID.
        """
        return filter(lambda x: x['id'] == fileID, self.cloudFiles['items'])[0][
            'originalFilename']


    def addReaders(self, fileID = None, emailAddressList = None,
                   retryCount = 0):
        """
        Add reader permission to an export file that has been uploaded to the
        cloud for the given list of email addresses.

        Email notification is suppressed by default.

        :param fileID: String of the cloud file ID to be processed.
        :param emailAddressList: List of email addresses.
        :returns: Boolean True if successful, otherwise False.
        """
        # @todo Provide support for retry count
        success = True

        self.logger.log('file id: {}'.format(fileID))
        self.logger.log('address list: {}'.format(emailAddressList))

        for addr in emailAddressList:
            permission = {'value': addr, 'type': 'user', 'role': 'reader'}

            if fileID:
                try:
                    resp = self.driveService.permissions().insert(
                        fileId = fileID, sendNotificationEmails = False,
                        body = permission).execute()
                    self.logger.log(
                        'Reader permission added for {}.'.format(addr))
                except errors.HttpError as error:
                    self.logger.log('An error occurred: {}'.format(error))
                    success = False

        if not success and retryCount <= 0:
            return False
        elif success:
            return True
        else:
            time.sleep(self.retryDelay)
            self.logger.log('Retrying adding readers for ID {}.'.format(fileID),
                            'warning')
            self.addReaders(fileID = fileID,
                            emailAddressList = emailAddressList,
                            retryCount = retryCount - 1)
class MSGDataAggregator(object):
    """
    Use for continuous data aggregation of diverse data types relevant to the
    Maui Smart Grid project.

    Four data types are supported:

    1. Irradiance
    2. Temperature/Humidity (weather)
    3. Circuit
    4. eGauge

    The general data form conforms to

    1. timestamp, subkey_id, val1, val2, val3, ...
    2. timestamp, val1, val2, val3, ...

    Case (2) is handled within the same space as (1) by testing for the
    existence of subkeys.

    Current aggregation consists of averaging over **15-min intervals**.

    Aggregation is performed in-memory and saved to the DB. The time range is
    delimited by start date and end date where the values are included in the
    range. The timestamps for aggregation intervals are the last timestamp in a
    respective series.

    * Aggregation subkeys are values such as eGauge IDs or circuit numbers.

    Aggregation is being implemented externally for performance and flexibility
    advantages over alternative approaches such as creating a view. It may be
    rolled into an internal function at future time if that proves to be
    beneficial.

    Usage:

        from msg_data_aggregator import MSGDataAggregator
        aggregator = MSGDataAggregator()

    API:

        aggregateAllData(dataType = dataType)

        aggregateNewData(dataType = dataType)

    """

    def __init__(self, exitOnError=True, commitOnEveryInsert=False, testing=False):
        """
        Constructor.

        :param testing: if True, the testing DB will be connected instead of
        the production DB.
        """

        self.logger = SEKLogger(__name__, "info")
        self.configer = MSGConfiger()
        self.conn = MSGDBConnector().connectDB()
        self.cursor = self.conn.cursor()
        self.dbUtil = MSGDBUtil()
        self.notifier = MSGNotifier()
        self.mathUtil = MSGMathUtil()
        self.timeUtil = MSGTimeUtil()
        self.nextMinuteCrossing = {}
        self.nextMinuteCrossingWithoutSubkeys = None
        self.exitOnError = exitOnError
        self.commitOnEveryInsert = commitOnEveryInsert
        section = "Aggregation"
        tableList = [
            "irradiance",
            "agg_irradiance",
            "weather",
            "agg_weather",
            "circuit",
            "agg_circuit",
            "egauge",
            "agg_egauge",
        ]
        self.dataParams = {
            "weather": ("agg_weather", "timestamp", ""),
            "egauge": ("agg_egauge", "datetime", "egauge_id"),
            "circuit": ("agg_circuit", "timestamp", "circuit"),
            "irradiance": ("agg_irradiance", "timestamp", "sensor_id"),
        }
        self.columns = {}

        # tables[datatype] gives the table name for datatype.
        self.tables = {t: self.configer.configOptionValue(section, "{}_table".format(t)) for t in tableList}

        for t in self.tables.keys():
            self.logger.log("t:{}".format(t), "DEBUG")
            try:
                self.columns[t] = self.dbUtil.columnsString(self.cursor, self.tables[t])
            except TypeError as error:
                self.logger.log("Ignoring missing table: Error is {}.".format(error), "error")

    def existingIntervals(self, aggDataType="", timeColumnName=""):
        """
        Retrieve the existing aggregation intervals for the given data type.

        :param aggDataType: string
        :param timeColumnName: string
        :return: List of intervals.
        """

        return [
            x[0]
            for x in self.rows(
                """SELECT {0} from \"{1}\" ORDER BY {2}""".format(
                    timeColumnName, self.tables[aggDataType], timeColumnName
                )
            )
        ]

    def unaggregatedIntervalCount(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Return count of unaggregated intervals for a given data type.
        :param dataType:
        :param aggDataType:
        :param timeColumnName:
        :param idColumnName:
        :return: int
        """

        return len(self.unaggregatedEndpoints(dataType, aggDataType, timeColumnName, idColumnName))

    def lastAggregationEndpoint(self, aggDataType="", timeColumnName=""):
        """
        Last aggregation endpoint for a given datatype.

        :param dataType:
        :param timeColumnName:
        :return:
        """

        return self.existingIntervals(aggDataType=aggDataType, timeColumnName=timeColumnName)[-1]

    def unaggregatedEndpoints(self, dataType="", aggDataType="", timeColumnName="", idColumnName=""):
        """
        Sorted (ascending) endpoints and their IDs, if available,
        for unaggregated intervals since the last aggregation endpoint for a
        given data type.

        This has a problem where an endpoint at 23:45:04 will be returned as
        23:45:00. This makes the return value incorrect for raw data types
        having readings at sub-minute intervals such as data for circuit,
        irradiance and weather. This condition does not affect correct
        aggregation. Only the definition of the return value is wrong.

        :param dataType: string
        :param aggDataType: string
        :param timeColumnName: string
        :param idColName: string
        :return: list of datetimes.
        """

        if idColumnName != "":
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: id col
            # 4: last aggregated time
            sql = (
                'SELECT "{0}".{2}, "{0}".{3} FROM "{0}" LEFT JOIN "{1}" ON '
                '"{0}".{2} = "{1}".{2} AND "{0}".{3} = "{1}".{3} WHERE "{'
                '1}".{2} IS NULL AND "{0}".{2} > \'{4}\' ORDER BY {2} ASC, '
                "{3} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            # The id column value is available in the tuple returned by
            # groupby but is not being used here.

            # @todo Exclude last endpoint if it is equal to the last
            # aggregation endpoint.
            #
            # The minute position filtering may be including the last
            # endpoint incorrectly because there are readings occurring
            # within the same minute as the final endpoint, e.g. 23:45:04,
            # 23:45:08, etc.
            #
            # This is not a problem with eGuage data due reading intervals
            # being every minute and zero seconds.

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y[0].timetuple()[0:5],
                            filter(
                                lambda x: x[0].timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0], x[1])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            idColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )
        else:
            # Key:
            # 0: raw
            # 1: agg
            # 2: time col
            # 3: last aggregated time
            sql = (
                'SELECT "{0}".{2} FROM "{0}" LEFT JOIN "{1}" ON "{0}".{2}='
                '"{1}".{2} WHERE "{1}".{2} IS NULL AND "{0}".{2} > \'{3}\' '
                "ORDER BY {2} ASC"
            )

            self.logger.log("last agg endpoint: {}".format(self.lastAggregationEndpoint(aggDataType, timeColumnName)))

            return map(
                lambda x: datetime(x[0], x[1], x[2], x[3], x[4], 0),
                [
                    k
                    for k, v in groupby(
                        map(
                            lambda y: y.timetuple()[0:5],
                            filter(
                                lambda x: x.timetuple()[MINUTE_POSITION] % INTERVAL_DURATION == 0,
                                [
                                    (x[0])
                                    for x in self.rows(
                                        sql.format(
                                            self.tables[dataType],
                                            self.tables[aggDataType],
                                            timeColumnName,
                                            self.lastAggregationEndpoint(aggDataType, timeColumnName),
                                        )
                                    )
                                ],
                            ),
                        )
                    )
                ],
            )

    def intervalCrossed(self, minute=None, subkey=None):
        """
        Determine interval crossing. Intervals are at 0, 15, 45, 60 min.
        The interval size is determined by MECO source data.

        :param minute: The integer value of the minute.
        :param subkey: The name for the subkey used for aggregation.
        :returns: True if an interval was crossed, False otherwise.
        """

        if not minute and minute != 0:
            raise Exception("Minute not defined.")

        intervalSize = 15
        first = 0
        last = 60

        if subkey is not None:
            if (
                minute >= self.nextMinuteCrossing[subkey]
                and minute <= last
                and self.nextMinuteCrossing[subkey] != first
            ):
                self.nextMinuteCrossing[subkey] += intervalSize
                if self.nextMinuteCrossing[subkey] >= last:
                    self.nextMinuteCrossing[subkey] = first
                self.logger.log("minute crossed at #1.", "debug")
                return True
            elif self.nextMinuteCrossing[subkey] == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossing[subkey] = intervalSize
                self.logger.log("minute crossed at #2.", "debug")
                return True
            return False
        else:
            if (
                minute >= self.nextMinuteCrossingWithoutSubkeys
                and minute <= last
                and self.nextMinuteCrossingWithoutSubkeys != first
            ):
                self.nextMinuteCrossingWithoutSubkeys += intervalSize
                if self.nextMinuteCrossingWithoutSubkeys >= last:
                    self.nextMinuteCrossingWithoutSubkeys = first
                self.logger.log("minute crossed at #3.", "debug")
                return True
            elif self.nextMinuteCrossingWithoutSubkeys == first and minute >= first and minute <= intervalSize:
                self.nextMinuteCrossingWithoutSubkeys = intervalSize
                self.logger.log("minute crossed at #4.", "debug")
                return True
            return False

    def rows(self, sql):
        """
        Rows from a SQL fetch.

        :param sql: Command to be executed.
        :returns: DB result set.
        """

        self.logger.log("sql: {}".format(sql), "debug")
        self.dbUtil.executeSQL(self.cursor, sql)
        return self.cursor.fetchall()

    def rawData(self, dataType="", orderBy=None, timestampCol="", startDate="", endDate=""):
        """
        Raw data to be aggregated.

        :param dataType: string
        :param orderBy: list
        :param timestampCol: string
        :param startDate: string
        :param endDate: string
        :returns: DB rows.
        """

        # @todo Validate args.

        orderBy = filter(None, orderBy)

        return self.rows(
            """SELECT {} FROM "{}" WHERE {} BETWEEN '{}' AND
        '{}' ORDER BY {}""".format(
                self.columns[dataType], self.tables[dataType], timestampCol, startDate, endDate, ",".join(orderBy)
            )
        )

    def subkeys(self, dataType="", timestampCol="", subkeyCol="", startDate="", endDate=""):
        """
        The distinct subkeys for a given data type within a time range.

        Subkeys are fields such as egauge_id in eGauge data or sensor_id in
        irradiance data.

        :param dataType: string
        :param timestampCol: string
        :param subkeyCol: string
        :param startDate: string
        :param endDate: string
        :returns: List of subkeys
        """

        return [
            sk[0]
            for sk in self.rows(
                """SELECT DISTINCT({}) FROM "{}"
        WHERE {} BETWEEN '{}' AND '{}'
            ORDER BY {}""".format(
                    subkeyCol, self.tables[dataType], timestampCol, startDate, endDate, subkeyCol
                )
            )
        ]

    def insertAggregatedData(self, agg=None):
        """
        :param agg: MSGAggregatedData
        :return: None
        """

        if not agg.columns:
            raise Exception("agg columns not defined.")
        if not agg.data:
            raise Exception("agg data not defined.")

        self.logger.log("agg data: {}".format(agg.data))
        self.logger.log("agg data type: {}".format(type(agg.data)))

        def __insertData(values=""):
            """
            Perform insert of data to the database using the given values.
            :param values: String containing values to be inserted.
            :return Nothing.
            """
            sql = 'INSERT INTO "{0}" ({1}) VALUES( {2})'.format(
                self.tables[agg.aggregationType], ",".join(agg.columns), values
            )
            self.logger.log("sql: {}".format(sql), "debug")
            success = self.dbUtil.executeSQL(self.cursor, sql, exitOnFail=self.exitOnError)

            # Used for a special case where data is reloaded.
            if self.commitOnEveryInsert:
                self.conn.commit()
            if not success and self.exitOnError:
                raise Exception("Failure during aggregated data insert.")

        for row in agg.data:
            if type(row) == type({}):
                # self.logger.log('row=%s' % row, 'debug')
                # self.logger.log('row type: %s' % type(row))

                for key in row.keys():
                    values = ""
                    valCnt = 0
                    for val in row[key]:
                        if val == "NULL":
                            values += val
                        elif type(val) == type(""):
                            values += "'" + val.strip() + "'"
                        elif isinstance(val, datetime):
                            values += "'" + val.isoformat() + "'"
                        elif type(val) == type(0):
                            values += str(val)
                        elif type(val) == type(0.0):
                            values += str(val)
                        else:
                            values += val
                        if valCnt < len(agg.columns) - 1:
                            values += ","
                        valCnt += 1
                    __insertData(values=values)

            elif type(row) == type([]):
                values = ""
                valCnt = 0
                for val in row:
                    if val == "NULL":
                        values += val
                    elif type(val) == type(""):
                        values += "'" + val.strip() + "'"
                    elif isinstance(val, datetime):
                        values += "'" + val.isoformat() + "'"
                    elif type(val) == type(0):
                        values += str(val)
                    elif type(val) == type(0.0):
                        values += str(val)
                    else:
                        values += val
                    if valCnt < len(agg.columns) - 1:
                        values += ","
                    valCnt += 1
                __insertData(values=values)
            else:
                self.logger.log("row = {}".format(row), "error")
                raise Exception("Row type not matched.")

        # End for row.
        self.conn.commit()

    def intervalAverages(self, sums, cnts, timestamp, timestampIndex, subkeyIndex=None, subkey=None):
        """
        Aggregates all data for the current interval for the given subkey.

        For the case where there are no subkeys, subkeyIndex and subkey
        should be None.

        :param sums: list
        :param cnts: list
        :param timestamp: datetime
        :param timestampIndex: int
        :param subkeyIndex: int
        :param subkey: string
        :returns: Averaged data as a dict with form {subkey:data}
        """

        if subkey is not None:
            myAvgs = {}
            reportedAgg = False
            myAvgs[subkey] = []
            sumIndex = 0

            self.logger.log("key: {}".format(subkey), "debug")
            # Iterate over sums.
            for s in sums[subkey]:
                if sumIndex == timestampIndex:
                    myAvgs[subkey].append(timestamp)
                elif sumIndex == subkeyIndex:
                    myAvgs[subkey].append(subkey)
                else:
                    if cnts[subkey][sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[subkey][sumIndex]), "debug")
                            reportedAgg = True

                        myAvgs[subkey].append(s / cnts[subkey][sumIndex])
                    else:
                        myAvgs[subkey].append("NULL")
                sumIndex += 1
            return myAvgs
        else:
            myAvgs = []
            reportedAgg = False
            sumIndex = 0
            for s in sums:
                if sumIndex == timestampIndex:
                    myAvgs.append(timestamp)
                else:
                    if cnts[sumIndex] != 0:
                        if not reportedAgg:
                            self.logger.log("Aggregating {} rows of data.".format(cnts[sumIndex]), "debug")
                            reportedAgg = True
                        myAvgs.append(s / cnts[sumIndex])
                    else:
                        myAvgs.append("NULL")
                sumIndex += 1
            return myAvgs

    def dataParameters(self, dataType=""):
        """
        Parameters for a given data type.
        :param dataType: string
        :return: (aggType, timeColName, subkeyColName)
        """
        try:
            assert len(self.dataParams[dataType]) == 3
            return self.dataParams[dataType]
        except:
            self.logger.log("Unmatched data type {}.".format(dataType))

    def aggregateAllData(self, dataType=""):
        """
        Convenience method for aggregating all data for a given data type.
        Data is inserted to individual aggregated data tables.
        :param dataType: String in the list of raw data types.
        :return: Nothing.
        """
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        for start, end in self.monthStartsAndEnds(timeColumnName=timeColName, dataType=dataType):
            self.logger.log("start, end: {}, {}".format(start, end))
            aggData = self.aggregatedData(
                dataType=dataType,
                aggregationType=aggType,
                timeColumnName=timeColName,
                subkeyColumnName=subkeyColName,
                startDate=start.strftime("%Y-%m-%d %H:%M:%S"),
                endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
            )
            self.insertAggregatedData(agg=aggData)
            for row in aggData.data:
                self.logger.log("aggData row: {}".format(row))

    def aggregateNewData(self, dataType=""):
        """
        Convenience method for aggregating new data.

        :param dataType:
        :return: dict of {dataType: count of aggregation endpoints}
        """

        # The new aggregation starting point is equal to the last aggregation
        # endpoint up to the last unaggregated endpoint.

        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)

        (end, start) = self.lastUnaggregatedAndAggregatedEndpoints(dataType).items()[0][1]

        self.logger.log(
            "datatype: {}; start, end: {}, {}; end type: {}".format(dataType, start, end, type(end)), "critical"
        )

        if type(end) == type(None):
            # No available unaggregated endpoints results in an empty list
            # for type egauge. The reason this does not work for other types is
            # because the other types of fractional minute readings and the
            # fractional minute readings are not being handled completely but
            # this method is still capable of working without problem.
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        if self.incrementEndpoint(start) >= end:
            self.logger.log("Nothing to aggregate.")
            return {dataType: 0}

        aggData = self.aggregatedData(
            dataType=dataType,
            aggregationType=aggType,
            timeColumnName=timeColName,
            subkeyColumnName=subkeyColName,
            startDate=self.incrementEndpoint(start).strftime("%Y-%m-%d %H:%M:%S"),
            endDate=end.strftime("%Y-%m-%d %H:%M:%S"),
        )
        self.insertAggregatedData(agg=aggData)
        for row in aggData.data:
            self.logger.log("aggData row: {}".format(row))

        self.logger.log("{} rows aggregated for {}.".format(len(aggData.data), dataType))
        return {dataType: len(aggData.data)}

    def incrementEndpoint(self, endpoint=None):
        """
        Increment an endpoint by one interval where endpoints are the final
        timestamp in an aggregation interval.
        :param endpoint: the endpoint to be incremented.
        :return: datetime object that is the given endpoint + a predefined
        amount of minutes.
        """
        plusOneInterval = relativedelta(minutes=15)
        return endpoint + plusOneInterval

    def lastUnaggregatedAndAggregatedEndpoints(self, dataType=""):
        """
        Return the endpoints for the given data type in the form

        {datatype: (last unaggregated endpoint, last aggregated endpoint)}.
        :param dataType:
        :return: dict with tuple.
        """
        self.logger.log("datatype {}".format(dataType))
        (aggType, timeColName, subkeyColName) = self.dataParameters(dataType)
        self.logger.log("subkey colname {}".format(subkeyColName))

        unAggregatedEndpoints = self.unaggregatedEndpoints(
            dataType=dataType, aggDataType=aggType, timeColumnName=timeColName, idColumnName=subkeyColName
        )

        self.logger.log("unagg endpoints: {}".format(unAggregatedEndpoints))
        return {
            dataType: (
                unAggregatedEndpoints[-1] if unAggregatedEndpoints != [] else None,
                self.lastAggregationEndpoint(aggDataType=aggType, timeColumnName=timeColName),
            )
        }

    def aggregatedVsNewData(self):
        """
        Convenience method.
        :return: dict of tuples containing {datatype:(last raw datetime,
        last agg datetime)}
        """
        return {
            x.keys()[0]: (x.values()[0])
            for x in map(self.lastUnaggregatedAndAggregatedEndpoints, [k for k in self.dataParams])
        }

    def monthStartsAndEnds(self, timeColumnName="", dataType=""):
        """
        Return first date and last date for the given **raw** data type for each
        month in the data's entire time range.

        The end date is incremented by on aggregation period to account for
        the data obtained at time 00:00.

        :param timeColumnName: string
        :param dataType: string
        :return: List of tuples.
        """

        self.logger.log("datatype {}".format(dataType), "debug")
        (start, end) = self.rows(
            """SELECT MIN({}), MAX({}) FROM \"{}\"""".format(timeColumnName, timeColumnName, self.tables[dataType])
        )[0]
        self.logger.log("start {}".format(start))
        self.logger.log("end {}".format(end))

        # End time needs transforming in split dates to extend the end of the
        # day to 23:59:59.

        splitDates = self.timeUtil.splitDates(start, end)

        startEndDatesTransform = []
        i = 0
        while i < len(splitDates):
            startEndDatesTransform.append(
                (
                    splitDates[i][0],
                    self.incrementEndpoint(
                        datetime(
                            splitDates[i][1].timetuple()[0],
                            splitDates[i][1].timetuple()[1],
                            splitDates[i][1].timetuple()[2],
                            23,
                            59,
                            59,
                        )
                    ),
                )
            )
            i += 1

        return startEndDatesTransform

    def aggregatedData(
        self, dataType="", aggregationType="", timeColumnName="", subkeyColumnName="", startDate="", endDate=""
    ):
        """
        ***********************************************************************
        Provide aggregated data.
        ***********************************************************************

        Start and end dates are used to calculate interval crossings.

        :param dataType: String
        :param aggregationType: String
        :param timeColumnName: String
        :param subkeyColumnName: String
        :param startDate: String
        :param endDate: String
        :returns: MSGAggregatedData
        """

        aggData = []
        ci = lambda col_name: self.columns[dataType].split(",").index(col_name)

        rowCnt = 0

        mySubkeys = []
        if subkeyColumnName:
            mySubkeys = self.subkeys(
                dataType=dataType,
                timestampCol=timeColumnName,
                subkeyCol=subkeyColumnName,
                startDate=startDate,
                endDate=endDate,
            )

        self.logger.log("subkeys: {}".format(mySubkeys), "debug")

        def __initSumAndCount(subkey=None, sums=None, cnts=None):
            """
            Initialize the sum and cnt data structures.
            :param subkey: string
            :param sums: list | dict | None
            :param cnts: list | dict | None
            """

            if not sums and not cnts:
                sums = {}
                cnts = {}

            if not mySubkeys:
                sums = []
                cnts = []
                for i in range(len(self.columns[dataType].split(","))):
                    sums.append(0)
                    cnts.append(0)
            else:
                if not subkey:
                    for i in range(len(self.columns[dataType].split(","))):
                        for k in mySubkeys:
                            if k not in sums.keys():
                                sums[k] = []
                                cnts[k] = []
                            sums[k].append(0)
                            cnts[k].append(0)
                else:
                    sums[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        sums[subkey].append(0)
                    cnts[subkey] = []
                    for i in range(len(self.columns[dataType].split(","))):
                        cnts[subkey].append(0)

            return (sums, cnts)

        (sum, cnt) = __initSumAndCount()

        def __initIntervalCrossings():
            """
            Perform initialization of the interval crossings used to
            determine when interval crossings occur.
            :returns None
            """

            subkeysToCheck = copy.copy(mySubkeys)
            self.logger.log("subkeys to check: {}".format(subkeysToCheck), "debug")

            if mySubkeys:
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName, subkeyColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):

                    # @CRITICAL: Exit after every subkey has been visited.
                    # This scans the raw data until each subkey is encountered
                    # ONCE and then exits.
                    if subkeysToCheck != []:
                        if row[ci(subkeyColumnName)] in subkeysToCheck:
                            subkeysToCheck.remove(row[ci(subkeyColumnName)])
                        minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                        if minute <= 15:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 15
                        elif minute <= 30:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 30
                        elif minute <= 45:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 45
                        elif minute == 0 or minute <= 59:
                            self.nextMinuteCrossing[row[ci(subkeyColumnName)]] = 0
                        else:
                            raise Exception("Unable to determine next minute crossing")
                        self.logger.log(
                            "next min crossing for {} = {}".format(
                                row[ci(subkeyColumnName)], self.nextMinuteCrossing[row[ci(subkeyColumnName)]]
                            ),
                            "debug",
                        )
                    else:
                        break

            else:
                # Non-subkey case e.g. weather data.
                rowCnt = 0
                # @todo Optimize by querying only the first row.
                for row in self.rawData(
                    dataType=dataType,
                    orderBy=[timeColumnName],
                    timestampCol=timeColumnName,
                    startDate=startDate,
                    endDate=endDate,
                ):
                    minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]
                    if minute <= 15:
                        self.nextMinuteCrossingWithoutSubkeys = 15
                    elif minute <= 30:
                        self.nextMinuteCrossingWithoutSubkeys = 30
                    elif minute <= 45:
                        self.nextMinuteCrossingWithoutSubkeys = 45
                    elif minute == 0 or minute <= 59:
                        self.nextMinuteCrossingWithoutSubkeys = 0
                    else:
                        raise Exception("Unable to determine next minute crossing")
                    self.logger.log("next min crossing = {}".format(self.nextMinuteCrossingWithoutSubkeys), "debug")
                    rowCnt += 1
                    if rowCnt > 0:
                        break

        __initIntervalCrossings()

        for row in self.rawData(
            dataType=dataType,
            orderBy=[timeColumnName, subkeyColumnName],
            timestampCol=timeColumnName,
            startDate=startDate,
            endDate=endDate,
        ):

            if mySubkeys:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]) and ci(col) != ci(subkeyColumnName):
                        sum[row[ci(subkeyColumnName)]][ci(col)] += row[ci(col)]
                        cnt[row[ci(subkeyColumnName)]][ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute, subkey=row[ci(subkeyColumnName)]):
                    minuteCrossed = minute

                    # Perform aggregation on all of the previous data including
                    # the current data for the current subkey.
                    self.logger.log("key: {}".format(row[ci(subkeyColumnName)]), "debug")
                    aggData += [
                        self.intervalAverages(
                            sum,
                            cnt,
                            row[ci(timeColumnName)],
                            ci(timeColumnName),
                            ci(subkeyColumnName),
                            row[ci(subkeyColumnName)],
                        )
                    ]
                    self.logger.log("minute crossed {}".format(minuteCrossed), "DEBUG")

                    # Init current sum and cnt for subkey that has a completed
                    # interval.
                    (sum, cnt) = __initSumAndCount(subkey=row[ci(subkeyColumnName)], sums=sum, cnts=cnt)
            else:
                for col in self.columns[dataType].split(","):
                    if self.mathUtil.isNumber(row[ci(col)]):
                        sum[ci(col)] += row[ci(col)]
                        cnt[ci(col)] += 1

                minute = row[ci(timeColumnName)].timetuple()[MINUTE_POSITION]

                if self.intervalCrossed(minute=minute):
                    aggData += [self.intervalAverages(sum, cnt, row[ci(timeColumnName)], ci(timeColumnName))]
                    (sum, cnt) = __initSumAndCount(subkey=None, sums=sum, cnts=cnt)

            rowCnt += 1

        self.logger.log("aggdata = {}".format(aggData), "debug")
        return MSGAggregatedData(
            aggregationType=aggregationType, columns=self.columns[dataType].split(","), data=aggData
        )
    Save retrieval results stored in a global string.
    """

    global MSG_BODY
    global WEATHER_DATA_PATH
    fp = open('%s/retrieval-results.txt' % WEATHER_DATA_PATH, 'wb')
    fp.write(MSG_BODY)
    fp.close()


if __name__ == '__main__':

    dbConnector = MSGDBConnector()
    cursor = dbConnector.conn.cursor()
    weatherUtil = MSGWeatherDataUtil()
    timeUtil = MSGTimeUtil()

    msg = "Downloading NOAA weather data (%s)." % timeUtil.conciseNow()
    print msg
    MSG_BODY = '%s\n' % msg

    msg = "Last loaded date is %s." % weatherUtil.datePart(
        datetime=weatherUtil.getLastDateLoaded(cursor))
    print msg
    MSG_BODY += '%s\n' % msg

    retriever = MSGWeatherDataRetriever()
    configer = MSGConfiger()
    WEATHER_DATA_PATH = configer.configOptionValue('Weather Data',
                                                   'weather_data_path')