Пример #1
0
def download_day_offers(year, month):
    expected_fname = "PUBLIC_DVD_BIDDAYOFFER_D_"+str(year)+str(month).zfill(2)+"010000.CSV"
    # If the file isn't in the folder, use nemosis to download it. 
    if not check_file_exists(os.path.join(raw_data_cache, 'BIDDAYOFFER_D'), expected_fname):
        print("File not found - fetching via NEMOSIS", expected_fname)
        start_time = pendulum.datetime(year,month,2,0,0).format('YYYY/MM/DD HH:mm:ss')
        end_time = pendulum.datetime(year,month,2,0,5).format('YYYY/MM/DD HH:mm:ss')
        print(start_time, end_time)
        # end_time = '2018/01/01 00:05:00'
        table = 'BIDDAYOFFER_D'
        data_fetch_methods.dynamic_data_compiler(start_time, end_time, table, os.path.join(raw_data_cache, 'BIDDAYOFFER_D'))
Пример #2
0
 def test_dispatch_tables_start_of_month_parquet_format(self):
     start_time = '2018/02/01 00:00:00'
     end_time = '2018/02/01 05:15:00'
     for table in self.table_names:
         print(
             'Testing {} returing values at start of month.'.format(table))
         dat_col = defaults.primary_date_columns[table]
         table_type = self.table_types[table]
         cols = [dat_col, self.table_types[table]]
         filter_cols = (self.table_types[table], )
         expected_length = 63
         expected_number_of_columns = 2
         expected_first_time = pd.to_datetime(
             start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)
         expected_last_time = pd.to_datetime(end_time,
                                             format='%Y/%m/%d %H:%M:%S')
         if table in [
                 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM',
                 'TRADINGINTERCONNECT'
         ]:
             expected_length = 10
             expected_first_time = '2018/02/01 00:30:00'
             expected_first_time = pd.to_datetime(
                 expected_first_time, format='%Y/%m/%d %H:%M:%S')
             expected_last_time = '2018/02/01 05:00:00'
             expected_last_time = pd.to_datetime(expected_last_time,
                                                 format='%Y/%m/%d %H:%M:%S')
         if table == 'BIDPEROFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
         if table == 'BIDDAYOFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
             expected_length = 2
             expected_last_time = '2018/02/01 00:00:00'
             expected_last_time = pd.to_datetime(expected_last_time,
                                                 format='%Y/%m/%d %H:%M:%S')
             expected_first_time = '2018/01/31 00:00:00'
             expected_first_time = pd.to_datetime(
                 expected_first_time, format='%Y/%m/%d %H:%M:%S')
         data = data_fetch_methods.dynamic_data_compiler(
             start_time,
             end_time,
             table,
             defaults.raw_data_cache,
             select_columns=cols,
             filter_cols=filter_cols,
             filter_values=self.filter_values[table_type],
             fformat='parquet',
             parse_data_types=True)
         data = data.reset_index(drop=True)
         print(table)
         self.assertEqual(expected_length, data.shape[0])
         self.assertEqual(expected_number_of_columns, data.shape[1])
         self.assertEqual(expected_first_time, data[dat_col][0])
         self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
         self.assertFalse(all(object == data.dtypes))
         print('Passed')
Пример #3
0
def get_regional_demand(start_time, end_time, raw_data_cache):

    dispatch_data = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHREGIONSUM',
        raw_data_cache,
        select_columns=[
            'SETTLEMENTDATE', 'INTERVENTION', 'REGIONID', 'TOTALDEMAND'
        ])

    dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0]

    dispatch_data['TOTALDEMAND'] = pd.to_numeric(dispatch_data['TOTALDEMAND'])

    dispatch_data = dispatch_data.pivot_table(values='TOTALDEMAND',
                                              index='SETTLEMENTDATE',
                                              columns='REGIONID')

    dispatch_data = dispatch_data.reset_index().fillna('0.0')

    dispatch_data = dispatch_data.rename(columns={
        'QLD1': 'qld',
        'NSW1': 'nsw',
        'VIC1': 'vic',
        'SA1': 'sa',
        'TAS1': 'tas'
    })

    dispatch_data.columns = [
        col + '-demand' if col != 'SETTLEMENTDATE' else col
        for col in dispatch_data.columns
    ]

    return dispatch_data
Пример #4
0
def get_tech_operating_capacities(start_time, end_time, raw_data_cache):
    tech_data = get_duid_techs(raw_data_cache)

    dispatch_data = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHLOAD',
        raw_data_cache,
        select_columns=[
            'DUID', 'SETTLEMENTDATE', 'INTERVENTION', 'AVAILABILITY'
        ])

    dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0]

    dispatch_data = pd.merge(dispatch_data, tech_data, on='DUID')

    dispatch_data['AVAILABILITY'] = pd.to_numeric(
        dispatch_data['AVAILABILITY'])

    dispatch_data = dispatch_data.groupby(['TECH', 'SETTLEMENTDATE'],
                                          as_index=False).aggregate(
                                              {'AVAILABILITY': 'sum'})

    dispatch_data['tech_region'] = dispatch_data['TECH'] + '-capacity'

    dispatch_data = dispatch_data.pivot_table(values='AVAILABILITY',
                                              index='SETTLEMENTDATE',
                                              columns='tech_region')

    dispatch_data = dispatch_data.reset_index().fillna('0.0')

    return dispatch_data
Пример #5
0
    def test_fcas_tables_end_of_year(self):
        table = 'FCAS_4_SECOND'
        minute_offset = 5
        start_time = self.start_year - timedelta(minutes=minute_offset)
        end_time = start_time + timedelta(minutes=minute_offset * 2)
        start_str = start_time.strftime("%Y/%m/%d %H:%M:%S")
        end_str = end_time.strftime("%Y/%m/%d %H:%M:%S")

        print('Testing {} returing values at end of year.'.format(table))
        dat_col = defaults.primary_date_columns[table]
        cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER']
        expected_length = 15 * (minute_offset * 2)
        length_check = False
        expected_number_of_columns = 3
        data = data_fetch_methods.dynamic_data_compiler(
            start_str,
            end_str,
            table,
            defaults.raw_data_cache,
            select_columns=cols,
            fformat="feather",
            keep_csv=False)
        length_array = data[dat_col].drop_duplicates()
        if length_array.shape[0] == expected_length:
            length_check = True
        # case if first data point is 00:00:01/02/03
        elif length_array.shape[0] == expected_length - 1:
            length_check = True
        self.assertTrue(length_check)
        self.assertEqual(expected_number_of_columns, data.shape[1])
        print('Passed')
Пример #6
0
 def test_caching_and_typing_works_parquet(self):
     start_time = '2018/02/20 23:00:00'
     end_time = '2018/02/20 23:30:00'
     for table in self.table_names:
         dat_col = defaults.primary_date_columns[table]
         id_col = self.id_cols[table]
         print('Testing {} returing values for 1 interval.'.format(table))
         data_fetch_methods.cache_compiler(start_time,
                                           end_time,
                                           table,
                                           defaults.raw_data_cache,
                                           fformat="parquet")
         data = data_fetch_methods.dynamic_data_compiler(
             start_time,
             end_time,
             table,
             defaults.raw_data_cache,
             fformat="parquet")
         dat_col_type = data[dat_col].dtype
         id_col_type = data[id_col].dtype
         not_empty = data.shape[0] > 0
         not_typed = all(data.dtypes == "object")
         self.assertTrue(not_empty)
         self.assertFalse(not_typed)
         self.assertEqual(dat_col_type, "<M8[ns]")
         self.assertEqual(id_col_type, "object")
         print('Passed')
Пример #7
0
def update_nemosis_cache():
    start_time = "{}/01/01 00:00:00".format(2012, )
    end_time = "{}/01/01 00:00:00".format(
        int(datetime.datetime.now().year, ) + 1)
    table = "TRADINGPRICE"
    raw_data_cache = 'data/aemo_raw_cache/'
    price_data = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, table, raw_data_cache)
Пример #8
0
    def test_dispatch_tables_start_of_month_just_csv_dont_merge(self):
        # Empty cache.
        for f in os.listdir(defaults.raw_data_cache):
            os.remove(os.path.join(defaults.raw_data_cache, f))

        start_time = '2018/02/01 00:00:00'
        end_time = '2018/02/01 05:15:00'
        for table in self.table_names:
            print(
                'Testing {} returing values at start of month.'.format(table))
            dat_col = defaults.primary_date_columns[table]
            table_type = self.table_types[table]
            cols = [dat_col, self.table_types[table]]
            filter_cols = (self.table_types[table], )
            expected_length = 63
            expected_number_of_columns = 2
            expected_first_time = pd.to_datetime(
                start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)
            expected_last_time = pd.to_datetime(end_time,
                                                format='%Y/%m/%d %H:%M:%S')
            if table in [
                    'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM',
                    'TRADINGINTERCONNECT'
            ]:
                expected_length = 10
                expected_first_time = '2018/02/01 00:30:00'
                expected_first_time = pd.to_datetime(
                    expected_first_time, format='%Y/%m/%d %H:%M:%S')
                expected_last_time = '2018/02/01 05:00:00'
                expected_last_time = pd.to_datetime(expected_last_time,
                                                    format='%Y/%m/%d %H:%M:%S')
            if table == 'BIDPEROFFER_D':
                cols = [dat_col, 'DUID', 'BIDTYPE']
                filter_cols = ('DUID', 'BIDTYPE')
                expected_number_of_columns = 3
            if table == 'BIDDAYOFFER_D':
                cols = [dat_col, 'DUID', 'BIDTYPE']
                filter_cols = ('DUID', 'BIDTYPE')
                expected_number_of_columns = 3
                expected_length = 2
                expected_last_time = '2018/02/01 00:00:00'
                expected_last_time = pd.to_datetime(expected_last_time,
                                                    format='%Y/%m/%d %H:%M:%S')
                expected_first_time = '2018/01/31 00:00:00'
                expected_first_time = pd.to_datetime(
                    expected_first_time, format='%Y/%m/%d %H:%M:%S')
            data = data_fetch_methods.dynamic_data_compiler(
                start_time,
                end_time,
                table,
                defaults.raw_data_cache,
                select_columns=cols,
                filter_cols=filter_cols,
                filter_values=self.filter_values[table_type],
                fformat='csv',
                data_merge=False)
            self.assertIsNone(data, None)
        self.assertEqual(len(os.listdir(defaults.raw_data_cache)), 2)
Пример #9
0
 def test_dispatch_tables_start_of_month(self):
     start_time = '2018/02/01 00:00:00'
     end_time = '2018/02/01 05:15:00'
     for table in self.table_names:
         print(f'Testing {table} returning values at start of month.')
         dat_col = defaults.primary_date_columns[table]
         table_type = self.table_types[table]
         filter_cols = self.table_filters[table]
         cols = [dat_col, *filter_cols]
         expected_length = 63
         expected_number_of_columns = 2
         expected_first_time = \
             (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') +
              timedelta(minutes=5))
         expected_last_time = pd.to_datetime(end_time,
                                             format='%Y/%m/%d %H:%M:%S')
         if table in [
                 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM',
                 'TRADINGINTERCONNECT'
         ]:
             expected_length = 10
             expected_first_time = '2018/02/01 00:30:00'
             expected_first_time =\
                 pd.to_datetime(expected_first_time,
                                format='%Y/%m/%d %H:%M:%S')
             expected_last_time = '2018/02/01 05:00:00'
             expected_last_time =\
                 pd.to_datetime(expected_last_time,
                                format='%Y/%m/%d %H:%M:%S')
         if "ONLY" not in table_type:
             expected_number_of_columns = 3
         if table == 'BIDDAYOFFER_D':
             expected_length = 2
             expected_last_time = '2018/02/01 00:00:00'
             expected_last_time = \
                 pd.to_datetime(expected_last_time,
                                format='%Y/%m/%d %H:%M:%S')
             expected_first_time = '2018/01/31 00:00:00'
             expected_first_time =\
                 pd.to_datetime(expected_first_time,
                                format='%Y/%m/%d %H:%M:%S')
         data = data_fetch_methods.dynamic_data_compiler(
             start_time,
             end_time,
             table,
             defaults.raw_data_cache,
             select_columns=cols,
             fformat="feather",
             keep_csv=False,
             filter_cols=filter_cols,
             filter_values=self.filter_values[table_type])
         data = data.reset_index(drop=True)
         self.assertEqual(expected_length, data.shape[0])
         self.assertEqual(expected_number_of_columns, data.shape[1])
         self.assertEqual(expected_first_time, data[dat_col][0])
         self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
         print('Passed')
Пример #10
0
def get_residual_demand(start_time, end_time, raw_data_cache):
    cols = ['DUID', 'Region', 'Fuel Source - Descriptor']
    tech_data = data_fetch_methods.static_table_xl(
        'Generators and Scheduled Loads', raw_data_cache, select_columns=cols)
    zero_srmc_techs = ['Wind', 'Solar', 'Solar ']
    tech_data = tech_data[tech_data['Fuel Source - Descriptor'].isin(
        zero_srmc_techs)]
    scada_data = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, 'DISPATCH_UNIT_SCADA', raw_data_cache)
    scada_data = pd.merge(scada_data, tech_data, on='DUID')
    scada_data['SCADAVALUE'] = pd.to_numeric(scada_data['SCADAVALUE'])
    scada_data = scada_data.groupby(['SETTLEMENTDATE', 'Region'],
                                    as_index=False).agg({'SCADAVALUE': 'sum'})
    regional_demand = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, 'DISPATCHREGIONSUM', raw_data_cache)
    regional_demand = regional_demand[regional_demand['INTERVENTION'] == 0]
    regional_demand = pd.merge(regional_demand,
                               scada_data,
                               left_on=['SETTLEMENTDATE', 'REGIONID'],
                               right_on=['SETTLEMENTDATE', 'Region'])
    regional_demand['TOTALDEMAND'] = pd.to_numeric(
        regional_demand['TOTALDEMAND'])
    regional_demand['RESIDUALDEMAND'] = regional_demand[
        'TOTALDEMAND'] - regional_demand['SCADAVALUE']

    regional_demand = regional_demand.pivot_table(values='RESIDUALDEMAND',
                                                  index='SETTLEMENTDATE',
                                                  columns='REGIONID')

    regional_demand = regional_demand.reset_index().fillna('0.0')

    regional_demand = regional_demand.rename(columns={
        'QLD1': 'qld',
        'NSW1': 'nsw',
        'VIC1': 'vic',
        'SA1': 'sa',
        'TAS1': 'tas'
    })

    regional_demand.columns = [
        col + '-demand' if col != 'SETTLEMENTDATE' else col
        for col in regional_demand.columns
    ]
    return regional_demand
Пример #11
0
def get_unit_dispatch(start_time, end_time, unit, raw_data_cache):
    dispatch_data = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHLOAD',
        raw_data_cache,
        select_columns=['DUID', 'SETTLEMENTDATE', 'INTERVENTION', 'INITIALMW'])
    dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0]
    dispatch_data = dispatch_data[dispatch_data['DUID'] == unit]
    initial_mw = dispatch_data['INITIALMW'].iloc[0]
    return float(initial_mw)
Пример #12
0
def get_wholesale_prices(year, region):
    start_time = "{}/01/01 00:00:00".format(year, )
    end_time = "{}/01/01 00:00:00".format(int(year, ) + 1)
    table = "TRADINGPRICE"
    raw_data_cache = 'data/aemo_raw_cache/'
    price_data = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, table, raw_data_cache)
    price_data = price_data[price_data['REGIONID'] == (region + '1')]
    price_data = price_data.loc[:, ['SETTLEMENTDATE', 'RRP']]
    price_data['RRP'] = pd.to_numeric(price_data['RRP'])
    price_data['SETTLEMENTDATE'] = pd.to_datetime(price_data['SETTLEMENTDATE'])
    return price_data
Пример #13
0
 def test_filtering_for_one_interval_returns(self):
     start_time = '2017/05/20 23:00:00'
     end_time = '2017/05/20 23:05:00'
     for table in self.table_names:
         print('Testing {} returing values for 1 interval.'.format(table))
         data = data_fetch_methods.dynamic_data_compiler(
                 start_time, end_time, table, defaults.raw_data_cache,
                 select_columns=defaults.table_primary_keys[table] + ['END_DATE'])
         group_cols = [col for col in defaults.table_primary_keys[table] if col != 'START_DATE']
         contains_duplicates = data.duplicated(group_cols).any()
         self.assertEqual(False, contains_duplicates)
         not_empty = data.shape[0] > 0
         self.assertEqual(True, not_empty)
         print('Passed')
Пример #14
0
def get_fleet_dispatch(start_time, end_time, fleet_units, region,
                       raw_data_cache):

    dispatch_data = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHLOAD',
        raw_data_cache,
        select_columns=[
            'DUID', 'SETTLEMENTDATE', 'TOTALCLEARED', 'INTERVENTION',
            'RAISEREG', 'RAISE6SEC', 'RAISE60SEC', 'RAISE5MIN'
        ])
    dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0]

    dispatch_data = dispatch_data[dispatch_data['DUID'].isin(fleet_units)]

    dispatch_data['TOTALCLEARED'] = pd.to_numeric(
        dispatch_data['TOTALCLEARED'])
    dispatch_data['RAISEREG'] = pd.to_numeric(dispatch_data['RAISEREG'])
    dispatch_data['RAISE6SEC'] = pd.to_numeric(dispatch_data['RAISE6SEC'])
    dispatch_data['RAISE60SEC'] = pd.to_numeric(dispatch_data['RAISE60SEC'])
    dispatch_data['RAISE5MIN'] = pd.to_numeric(dispatch_data['RAISE5MIN'])

    dispatch_data = dispatch_data.groupby('SETTLEMENTDATE',
                                          as_index=False).aggregate({
                                              'TOTALCLEARED':
                                              'sum',
                                              'RAISEREG':
                                              'sum',
                                              'RAISE6SEC':
                                              'sum',
                                              'RAISE60SEC':
                                              'sum',
                                              'RAISE5MIN':
                                              'sum'
                                          })

    aemo_dispatch_names = {
        'TOTALCLEARED': region + '-energy-fleet-dispatch',
        'RAISEREG': region + '-raise_regulation-fleet-dispatch',
        'RAISE6SEC': region + '-raise_6_second-fleet-dispatch',
        'RAISE60SEC': region + '-raise_60_second-fleet-dispatch',
        'RAISE5MIN': region + '-raise_5_minute-fleet-dispatch'
    }

    dispatch_data = dispatch_data.rename(columns=aemo_dispatch_names)

    return dispatch_data
Пример #15
0
 def test_dispatch_tables_end_of_month(self):
     start_time = '2013/07/31 21:00:00'
     end_time = '2013/08/01 00:00:00'
     for table in self.table_names:
         print('Testing {} returing values at end of month.'.format(table))
         dat_col = defaults.primary_date_columns[table]
         table_type = self.table_types[table]
         cols = [dat_col, self.table_types[table]]
         filter_cols = (self.table_types[table],)
         expected_length = 36
         expected_number_of_columns = 2
         expected_first_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)
         expected_last_time = pd.Timestamp.strptime(end_time, '%Y/%m/%d %H:%M:%S')
         if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']:
             expected_length = 6
             expected_first_time = '2013/07/31 21:30:00'
             expected_first_time = pd.Timestamp.strptime(expected_first_time, '%Y/%m/%d %H:%M:%S')
         if table == 'BIDPEROFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
         if table == 'BIDDAYOFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
             expected_length = 1
             expected_last_time = expected_first_time.replace(hour=0, minute=0)
             expected_first_time = expected_first_time.replace(hour=0, minute=0)
         data = data_fetch_methods.dynamic_data_compiler(
                 start_time, end_time, table, defaults.raw_data_cache,
                 select_columns=cols,
                 filter_cols=filter_cols, filter_values=self.filter_values[table_type])
         data = data.sort_values(dat_col)
         data = data.reset_index(drop=True)
         self.assertEqual(expected_length, data.shape[0])
         self.assertEqual(expected_number_of_columns, data.shape[1])
         self.assertEqual(expected_first_time, data[dat_col][0])
         self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
         print('Passed')
Пример #16
0
def get_regional_prices(start_time, end_time, raw_data_cache):

    dispatch_data = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHPRICE',
        raw_data_cache,
        select_columns=[
            'SETTLEMENTDATE', 'INTERVENTION', 'REGIONID', 'RRP', 'RAISEREGRRP',
            'RAISE6SECRRP', 'RAISE60SECRRP', 'RAISE5MINRRP'
        ])

    dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0]
    data = pd.DataFrame()
    for name, aemo_name in aemo_price_names.items():
        dispatch_data[aemo_name] = pd.to_numeric(dispatch_data[aemo_name])
        data_temp = dispatch_data.pivot_table(values=aemo_name,
                                              index='SETTLEMENTDATE',
                                              columns='REGIONID')
        data_temp = data_temp.reset_index().fillna('0.0')
        data_temp = data_temp.rename(
            columns={
                'QLD1': 'qld',
                'NSW1': 'nsw',
                'VIC1': 'vic',
                'SA1': 'sa',
                'TAS1': 'tas'
            })
        data_temp.columns = [
            col + '-' + name if col != 'SETTLEMENTDATE' else col
            for col in data_temp.columns
        ]
        if data.empty:
            data = data_temp
        else:
            data = pd.merge(data, data_temp, on=['SETTLEMENTDATE'])

    return data
Пример #17
0
 def test_dispatch_tables_stradle_years(self):
     table = 'FCAS_4_SECOND'
     start_time = '2011/12/31 23:55:04'
     end_time = '2012/01/01 00:05:00'
     print('Testing {} returing values from adjacent years.'.format(table))
     dat_col = defaults.primary_date_columns[table]
     cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER']
     filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER')
     expected_length = 149
     expected_number_of_columns = 3
     expected_firt_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') + timedelta(seconds=4)
     expected_last_time = pd.Timestamp.strptime(end_time, '%Y/%m/%d %H:%M:%S')
     data = data_fetch_methods.dynamic_data_compiler(
         start_time, end_time, table, defaults.raw_data_cache,
         select_columns=cols,
         filter_cols=filter_cols, filter_values=(['1'], ['3']))
     data = data.sort_values(dat_col)
     data = data.reset_index(drop=True)
     self.assertEqual(expected_length, data.shape[0])
     self.assertEqual(expected_number_of_columns, data.shape[1])
     self.assertEqual(expected_firt_time, data[dat_col][0])
     self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
     print('Passed')
Пример #18
0
 def test_fcas_tables_end_of_month(self):
     table = 'FCAS_4_SECOND'
     start_time = '2013/07/31 23:55:03'
     end_time = '2013/08/01 00:00:04'
     print('Testing {} returing values at end of month.'.format(table))
     dat_col = defaults.primary_date_columns[table]
     cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER']
     filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER')
     expected_length = 75
     expected_number_of_columns = 3
     expected_firt_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(seconds=4)
     expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S')
     data = data_fetch_methods.dynamic_data_compiler(
         start_time, end_time, table, defaults.raw_data_cache,
         select_columns=cols,
         filter_cols=filter_cols, filter_values=(['1'], ['3']))
     data = data.sort_values(dat_col)
     data = data.reset_index(drop=True)
     self.assertEqual(expected_length, data.shape[0])
     self.assertEqual(expected_number_of_columns, data.shape[1])
     self.assertEqual(expected_firt_time, data[dat_col][0])
     self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
     print('Passed')
Пример #19
0
    def trading_price(self):
        table = 'TRADINGPRICE'
        raw_data_cache = './cache'

        trading_price = data_fetch_methods.dynamic_data_compiler(
            self._start_date.strftime("%Y/%m/%d 00:00:00"),
            self._end_date.strftime("%Y/%m/%d 00:00:00"), table,
            raw_data_cache)

        trading_price = trading_price.loc[trading_price['REGIONID'] ==
                                          self._state]
        trading_price['SETTLEMENTDATE'] = pd.to_datetime(
            trading_price['SETTLEMENTDATE'], format='%d/%m/%Y %H:%M')
        trading_price = trading_price.sort_values(by=['SETTLEMENTDATE'])
        trading_price = trading_price.reset_index(drop=True)
        trading_price['tstep_len'] = [30] * len(trading_price.index)

        columns = [
            'RRP', 'RAISE6SECRRP', 'RAISE60SECRRP', 'RAISE5MINRRP',
            'RAISEREGRRP', 'LOWER6SECRRP', 'LOWER60SECRRP', 'LOWERREGRRP'
        ]
        trading_price[columns] = trading_price[columns].astype(float)

        return trading_price
Пример #20
0
def plant_stats(start_time,
                end_time,
                table_name,
                raw_data_location,
                select_columns=None,
                filter_cols=None,
                filter_values=None):

    ix = pd.DatetimeIndex(
        start=datetime.strptime(start_time, '%Y/%m/%d %H:%M:%S'),
        end=datetime.strptime(end_time, '%Y/%m/%d %H:%M:%S') -
        timedelta(minutes=5),
        freq='5T')
    timeseries_df = pd.DataFrame(index=ix)
    timeseries_df.reset_index(inplace=True)
    timeseries_df.columns = ['SETTLEMENTDATE']

    gen_max_cap = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DUDETAIL',
        raw_data_location,
        select_columns=['EFFECTIVEDATE', 'DUID', 'VERSIONNO', 'MAXCAPACITY'],
        filter_cols=filter_cols,
        filter_values=filter_values)
    gen_max_cap = select_highest_version_number(
        gen_max_cap, defaults.table_primary_keys['DUDETAIL'])
    gen_region = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DUDETAILSUMMARY',
        raw_data_location,
        select_columns=['START_DATE', 'END_DATE', 'DUID', 'REGIONID'],
        filter_cols=filter_cols,
        filter_values=filter_values)
    scada = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCH_UNIT_SCADA',
        raw_data_location,
        select_columns=['SETTLEMENTDATE', 'DUID', 'SCADAVALUE'])
    dispatch_price = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHPRICE',
        raw_data_location,
        select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP', 'INTERVENTION'])
    dispatch_price = select_intervention_if_present(
        dispatch_price, defaults.table_primary_keys['DISPATCHPRICE'])
    trading_price = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'TRADINGPRICE',
        raw_data_location,
        select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP'])
    dispatch_price['RRP'] = pd.to_numeric(dispatch_price['RRP'])
    trading_price['RRP'] = pd.to_numeric(trading_price['RRP'])
    #trading_price = calc_trading_price(dispatch_price)

    region_summary = data_fetch_methods.dynamic_data_compiler(
        start_time,
        end_time,
        'DISPATCHREGIONSUM',
        raw_data_location,
        select_columns=[
            'SETTLEMENTDATE', 'REGIONID', 'TOTALDEMAND', 'INTERVENTION',
            'DISPATCHINTERVAL'
        ])
    region_summary = select_intervention_if_present(
        region_summary, defaults.table_primary_keys['DISPATCHREGIONSUM'])

    scada_list = []
    for gen in scada.groupby(['DUID'], as_index=False):
        temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE')
        temp['SCADAVALUE'] = np.where(temp['SCADAVALUE'].isnull(), 0.0,
                                      temp['SCADAVALUE'])
        temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID'])
        scada_list.append(temp)

    scada = pd.concat(scada_list)

    trading_load = calc_trading_load(scada)
    combined_data = merge_tables_for_plant_stats(timeseries_df, gen_max_cap,
                                                 gen_region, scada,
                                                 trading_load, dispatch_price,
                                                 trading_price, region_summary)
    combined_data['SCADAVALUE'] = pd.to_numeric(combined_data['SCADAVALUE'])
    combined_data['MAXCAPACITY'] = pd.to_numeric(combined_data['MAXCAPACITY'])
    combined_data['TRADING_TOTALCLEARED'] = pd.to_numeric(
        combined_data['TRADING_TOTALCLEARED'])

    combined_data['TOTALDEMAND'] = pd.to_numeric(combined_data['TOTALDEMAND'])

    stats = stats_by_month_and_plant(combined_data)
    return stats
Пример #21
0
def trading_and_dispatch_cost():
    gen_region = data_fetch_methods.dynamic_data_compiler(
        '2017/01/01 00:05:00',
        '2018/01/01 00:05:00',
        'DUDETAILSUMMARY',
        'E:/raw_aemo_data',
        select_columns=['START_DATE', 'END_DATE', 'DUID', 'REGIONID'])
    scada = data_fetch_methods.dynamic_data_compiler('2017/01/01 00:05:00',
                                                     '2018/01/01 00:05:00',
                                                     'DISPATCH_UNIT_SCADA',
                                                     'E:/raw_aemo_data')

    ix = pd.DatetimeIndex(start=datetime.strptime('2017/01/01 00:00:00',
                                                  '%Y/%m/%d %H:%M:%S'),
                          end=datetime.strptime('2018/01/01 00:00:00',
                                                '%Y/%m/%d %H:%M:%S'),
                          freq='5T')
    timeseries_df = pd.DataFrame(index=ix)
    timeseries_df.reset_index(inplace=True)
    timeseries_df.columns = ['SETTLEMENTDATE']

    scada_list = []
    for gen in scada.groupby(['DUID'], as_index=False):
        temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE')
        temp['SCADAVALUE'] = np.where(temp['SCADAVALUE'].isnull(), 0.0,
                                      temp['SCADAVALUE'])
        temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID'])
        scada_list.append(temp)

    scada = pd.concat(scada_list)

    dispatch_price = data_fetch_methods.dynamic_data_compiler(
        '2017/01/01 00:00:00', '2018/01/01 00:05:00', 'DISPATCHPRICE',
        'E:/raw_aemo_data')
    dispatch_price = select_intervention_if_present(
        dispatch_price, defaults.table_primary_keys['DISPATCHPRICE'])
    gen_region = gen_region.sort_values('START_DATE')
    scada = scada.sort_values('SETTLEMENTDATE')
    scada_and_regions = pd.merge_asof(scada,
                                      gen_region,
                                      left_on='SETTLEMENTDATE',
                                      right_on='START_DATE',
                                      by='DUID')
    scada_and_regions = scada_and_regions[~scada_and_regions['REGIONID'].
                                          isnull()]
    scada_and_regions = pd.merge(scada_and_regions,
                                 dispatch_price,
                                 'inner',
                                 left_on=['REGIONID', 'SETTLEMENTDATE'],
                                 right_on=['REGIONID', 'SETTLEMENTDATE'])

    scada_and_regions['SCADAVALUE'] = pd.to_numeric(
        scada_and_regions['SCADAVALUE'])
    scada_and_regions['RRP'] = pd.to_numeric(scada_and_regions['RRP'])

    scada_and_regions['DISPATCHCOST'] = (scada_and_regions['SCADAVALUE'] *
                                         scada_and_regions['RRP']) / 12

    scada_and_regions = scada_and_regions.set_index('SETTLEMENTDATE')
    scada_and_regions = scada_and_regions.groupby('DUID').resample(
        '30T', label='right', closed='right').aggregate({
            'SCADAVALUE': 'mean',
            'RRP': 'mean',
            'DISPATCHCOST': 'sum'
        })
    scada_and_regions.reset_index(inplace=True)

    scada_and_regions['TRADINGCOST'] = (scada_and_regions['SCADAVALUE'] *
                                        scada_and_regions['RRP']) / 2

    scada_and_regions['SCADAVALUE'] = scada_and_regions['SCADAVALUE'] / 2

    scada_and_regions = scada_and_regions.groupby('DUID').sum()

    scada_and_regions.to_csv(
        'C:/Users/user/Documents/dispatch_trading_cost.csv')
start_time_array = timeData[0:-1]
end_time_array = timeData[1:len(timeData)]

# For every day this creates a clean csv file of the Gen and Load of HPR
for x in range(0, len(start_time_array)):
    print(x)
    start_time = start_time_array[x].strftime("%Y/%m/%d %H:%M:%S")
    print(start_time)
    end_time = end_time_array[x].strftime("%Y/%m/%d %H:%M:%S")
    print(end_time)
    table = 'FCAS_4_SECOND'
    raw_data_cache = 'C:/...'  # The folder where the raw data is stored

    gen_filename = 'C:/.../HRP_GEN_TEST##.csv'  # The folder where the cleaned data will go
    load_filename = 'C:/.../HRP_LOAD_TEST##.csv'  # The folder where the cleaned data will go
    gen_filename = gen_filename.replace('##',
                                        start_time_array[x].strftime("%Y%m%d"))
    load_filename = load_filename.replace(
        '##', start_time_array[x].strftime("%Y%m%d"))

    price_data = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, table, raw_data_cache)

    price_data_HPRGen = price_data[price_data.ELEMENTNUMBER.isin(['330'])
                                   & price_data.VARIABLENUMBER.isin(['2'])]
    price_data_HPRLoad = price_data[price_data.ELEMENTNUMBER.isin(['331'])
                                    & price_data.VARIABLENUMBER.isin(['2'])]

    price_data_HPRGen.sort_values(by=['TIMESTAMP']).to_csv(gen_filename)
    price_data_HPRLoad.sort_values(by=['TIMESTAMP']).to_csv(load_filename)
Пример #23
0
def fcas4s_scada_match(start_time,
                       end_time,
                       table_name,
                       raw_data_location,
                       select_columns=None,
                       filter_cols=None,
                       filter_values=None):

    # Pull in the 4 second fcas data.
    table_name_fcas4s = 'FCAS_4_SECOND'
    fcas4s = data_fetch_methods.dynamic_data_compiler(start_time, end_time,
                                                      table_name_fcas4s,
                                                      raw_data_location)
    # Pull in the 4 second fcas variable types.
    table_name_variable_types = 'VARIABLES_FCAS_4_SECOND'
    fcas4s_variable_types = data_fetch_methods.static_table(
        start_time, end_time, table_name_variable_types, raw_data_location)

    # Select the variable types that measure MW on an interconnector and Gen_MW from a dispatch unit.
    fcas4s_variable_types = fcas4s_variable_types[
        fcas4s_variable_types['VARIABLETYPE'].isin(['MW', 'Gen_MW'])]
    fcas4s = fcas4s[fcas4s['VARIABLENUMBER'].isin(
        fcas4s_variable_types['VARIABLENUMBER'])]

    # Select just the fcas 4 second data variable columns that we need.
    fcas4s = fcas4s.loc[:, ('TIMESTAMP', 'ELEMENTNUMBER', 'VALUE')]

    # Convert the fcas MW measured values to numeric type.
    fcas4s['VALUE'] = pd.to_numeric(fcas4s['VALUE'])

    # Rename the 4 second measurements to the timestamp of the start of the 5 min interval i.e round down to nearest
    # 5 min interval.
    fcas4s = fcas4s[(fcas4s['TIMESTAMP'].dt.minute.isin(list(range(0, 60, 5))))
                    & (fcas4s['TIMESTAMP'].dt.second < 20)]
    fcas4s['TIMESTAMP'] = fcas4s['TIMESTAMP'].apply(
        lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute))

    # Pull in the dispatch unit scada data.
    table_name_scada = 'DISPATCH_UNIT_SCADA'
    scada = data_fetch_methods.dynamic_data_compiler(start_time, end_time,
                                                     table_name_scada,
                                                     raw_data_location)
    scada['SETTLEMENTDATE'] = scada['SETTLEMENTDATE'] - timedelta(minutes=5)
    scada = scada.loc[:, ('SETTLEMENTDATE', 'DUID', 'SCADAVALUE')]
    scada.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE']
    scada['SCADAVALUE'] = pd.to_numeric(scada['SCADAVALUE'])

    # Pull in the interconnector scada data and use the intervention records where the exist.
    table_name_inter_flow = 'DISPATCHINTERCONNECTORRES'
    inter_flows = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, table_name_inter_flow, raw_data_location)
    inter_flows['METEREDMWFLOW'] = pd.to_numeric(inter_flows['METEREDMWFLOW'])
    inter_flows = inter_flows.sort_values('INTERVENTION')
    inter_flows = inter_flows.groupby(['SETTLEMENTDATE', 'INTERCONNECTORID'],
                                      as_index=False).last()
    inter_flows = inter_flows.loc[:, ('SETTLEMENTDATE', 'INTERCONNECTORID',
                                      'METEREDMWFLOW')]
    inter_flows['SETTLEMENTDATE'] = inter_flows['SETTLEMENTDATE'] - timedelta(
        minutes=5)
    inter_flows.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE']

    # Combine scada data from interconnectors and dispatch units.
    scada_elements = pd.concat([scada, inter_flows], sort=False)

    # Merge the fcas and scada data based on time stamp, these leads every scada element to be joined to every fcas
    # element that then allows them to be comapred.
    profile_comp = pd.merge(fcas4s,
                            scada_elements,
                            'inner',
                            left_on='TIMESTAMP',
                            right_on='SETTLEMENTDATE')

    # Calculate the error between each measurement.
    profile_comp['ERROR'] = profile_comp['VALUE'] - profile_comp['SCADAVALUE']
    profile_comp['ERROR'] = profile_comp['ERROR'].abs()

    # Choose the fcas values that best matches the scada value during the 5 min interval.
    profile_comp = profile_comp.sort_values('ERROR')
    error_comp = profile_comp.groupby(
        ['MARKETNAME', 'ELEMENTNUMBER', 'TIMESTAMP'], as_index=False).first()

    # Aggregate the error to comapre each scada and fcas element potential match.
    error_comp = error_comp.groupby(['MARKETNAME', 'ELEMENTNUMBER'],
                                    as_index=False).sum()

    # Sort the comparisons based on aggregate error.
    error_comp = error_comp.sort_values('ERROR')

    # Drop duplicates of element numbers and scada element names, keeping the record for each with the least error.
    best_matches_scada = error_comp[
        error_comp['SCADAVALUE'].abs() >
        0]  # Don't include units 0 values for scada
    best_matches_scada = best_matches_scada.drop_duplicates('ELEMENTNUMBER',
                                                            keep='first')
    best_matches_scada = best_matches_scada.drop_duplicates('MARKETNAME',
                                                            keep='first')

    # Remove fcas elements where a match only occurred because both fcas and scada showed no dispatch.
    best_matches_scada['ELEMENTNUMBER'] = pd.to_numeric(
        best_matches_scada['ELEMENTNUMBER'])
    best_matches_scada = best_matches_scada.sort_values('ELEMENTNUMBER')
    best_matches_scada['ELEMENTNUMBER'] = best_matches_scada[
        'ELEMENTNUMBER'].astype(str)

    # Give error as a percentage.
    best_matches_scada['ERROR'] = best_matches_scada[
        'ERROR'] / best_matches_scada['SCADAVALUE']

    # drop matches with error greater than 100 %
    best_matches_scada = best_matches_scada[(best_matches_scada['ERROR'] < 1) &
                                            (best_matches_scada['ERROR'] > -1)]

    best_matches_scada = best_matches_scada.loc[:, ('ELEMENTNUMBER',
                                                    'MARKETNAME', 'ERROR')]

    if select_columns is not None:
        best_matches_scada = best_matches_scada.loc[:, select_columns]

    if filter_cols is not None:
        best_matches_scada = filters.filter_on_column_value(
            best_matches_scada, filter_cols, filter_values)

    return best_matches_scada