def get_hh_traces_stress_test(run_id, region):
    mapping_info = read_pickle_from_s3(project_bucket,
                                       results_stress_test_summary_mapping_s3_pickle_path.format(run_id))
    df_hh_traces = pd.DataFrame()
    for elem in mapping_info:
        print(elem)
        if (elem[0] == region) & (elem[1] <= datetime(2022, 1, 1).date()):
            week_ending = elem[1]
            week_starting = week_ending - timedelta(weeks=1)
            p = elem[2]
            sim_index = int(elem[3])
            df_sim = read_pickle_from_s3(project_bucket,
                                         results_stress_test_by_sim_s3_pickle_path.format(run_id,
                                                                                          sim_index,
                                                                                          sim_index))
            df_sim['Date'] = \
                df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date())
            df_tmp = df_sim[(df_sim['Date'] > week_starting)
                            & (df_sim['Date'] <= week_ending)][['TradingRegion', 'SettlementDateTime',
                                                                'Spot Price', 'adjusted price', 'Adjusted EAR Cost']]
            df_tmp['Percentile'] = p
            df_tmp['PercentileBase'] = elem[0]
            df_hh_traces = df_hh_traces.append(df_tmp)
        else:
            continue

    write_pickle_to_s3(df_hh_traces, project_bucket,
                       results_stress_test_hh_traces_s3_pickle_path.format(run_id, region))
Пример #2
0
def simulate_demand_profile(key_name):
    region = key_name.split('/')[1]
    distributor = key_name.split('/')[2].split('.')[0]

    # get reference day parameters from Spot Simulations
    # object_key = spot_simulation_input_extra_info.format(run_id)
    # s3 = boto3.client('s3')
    # obj = s3.get_object(Bucket=bucket_nbe, Key=object_key)
    # data = obj['Body'].read()
    # extra_sim_mapping = pd.read_excel(io.BytesIO(data))
    # # extra_sim_mapping = pd.read_excel('../input/mapping.xlsx')
    # extra_sim_mapping['original_sim'] = (extra_sim_mapping[1] / 9).astype('int')
    # extra_sims = extra_sim_mapping[['original_sim', 3]].to_dict(orient='split')['data']
    params = read_pickle_from_s3(bucket_spot_simulation,
                                 parameters_for_batch_v2.format(run_id))

    meter_data_dic = read_pickle_from_s3(
        bucket_nbe, meter_data_pickle_path.format(region, distributor))
    for sim_i in params.keys():
        sim_meter_data = dict()
        for week in params[
                sim_i][:209]:  # first 209 weeks in simulation: to 2024-1-2
            for day in week:
                sim_day = day['sim_date']
                ref_day = day['ref_date']
                sim_meter_data[sim_day] = meter_data_dic[ref_day]
        write_pickle_to_s3(
            sim_meter_data, bucket_nbe,
            meter_data_simulation_s3_pickle_path.format(
                run_id, sim_i, distributor))
        print(sim_i)
def get_hh_traces(run_id, job_id):
    mapping_info = read_pickle_from_s3(
        bucket_nbe,
        results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id))
    df_hh_traces = pd.DataFrame()
    for elem in mapping_info:
        print(elem)
        if (elem[0]
                == 'GrandTotal') & (elem[1] <= datetime(2022, 1, 1).date()):
            week_ending = elem[1]
            week_starting = week_ending - timedelta(weeks=1)
            p = elem[2]
            sim_index = int(elem[3])
            df_sim = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_simulation_s3_pickle_path.format(
                    run_id, job_id, sim_index))
            df_sim['Date'] = \
                df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date())
            df_tmp = df_sim[(df_sim['Date'] > week_starting)
                            & (df_sim['Date'] <= week_ending)][[
                                'TradingRegion', 'SettlementDateTime',
                                'Swap Hedged Qty (MWh)',
                                'Cap Hedged Qty (MWh)', 'Customer Net MWh',
                                'Spot Price', 'Total Cost ($)'
                            ]]
            df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum()
            df_tmp_grandtotal.reset_index(inplace=True)
            df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
            df_tmp.reset_index(inplace=True, drop=True)
            df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True)
            df_tmp[
                'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2
            df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2
            df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2
            df_tmp = df_tmp.drop(columns=[
                'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                'Customer Net MWh'
            ])
            df_tmp['Percentile'] = p
            df_hh_traces = df_hh_traces.append(df_tmp)
        else:
            continue
    df_hh_traces['Spot Run No.'] = run_id
    df_hh_traces['Job No.'] = job_id
    # df_hh_traces.to_csv('HH_Simulation_Traces_{}.csv'.format(run_id))
    write_pickle_to_s3(
        df_hh_traces, bucket_nbe,
        results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id))
def get_output_stress_test(run_id, sim_num):
    # output by simulation
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_stress_test_summary_by_sim_s3_pickle_path.format(runid, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_stress_test_summary_by_sim_s3_pickle_path.format(runid,
                                                                                                  900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)

    # output by percentile, normal percentile format, and save the mapping of percentile & sim_index
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'FourWeekBlocks',
                                'Adjusted EAR Cost']].groupby(['TradingRegion',
                                                               'FourWeekBlocks']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile_stress_test(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'FourWeekBlocks', 'Percentile',
                                                              'Adjusted EAR Cost',
                                                              'SimNo. (based on Adjusted EAR Cost)'])
    df_percentile_update = df_percentile_update.rename(columns={'Adjusted EAR Cost': 'Adjusted Wholesale Margin',
                                                                'SimNo. (based on Adjusted EAR Cost)':
                                                                    'SimNo. (based on Adjusted Wholesale Margin)'})
    mapping_info = df_percentile_update[['TradingRegion', 'FourWeekBlocks', 'Percentile',
                                         'SimNo. (based on Adjusted Wholesale Margin)']].to_dict(orient='split')['data']
    write_pickle_to_s3(mapping_info, bucket_nbe, results_stress_test_summary_mapping_s3_pickle_path.format(run_id))
    print('mapping information saved.')

    new_output = duplicate_percentile_for_pbi_stress_test(df_percentile_lst, p_position=2)
    df_percentile_pbi = pd.DataFrame.from_records(new_output,
                                                  columns=['TradingRegion', 'FourWeeksBlocks_WeekEnding', 'Percentile',
                                                           'Adjusted Wholesale Margin',
                                                           'SimNo. (based on Adjusted Wholesale Margin)'])
    df_percentile_pbi['Spot Run No.'] = run_id
    df_percentile_pbi['Job No.'] = job_id
    # df_percentile_pbi.to_excel('NBE_StressTest_Output_by_PBI_percentiles_{}.xlsx'.format(run_id))
    df_percentile_pbi.reset_index(inplace=True)
    df_percentile_pbi = df_percentile_pbi.rename(columns={'index': 'Case'})
    df_percentile_pbi.to_csv('EAR_StressTest_Output_by_PBI_percentiles_{}_{}.csv'.format(job_id, run_id), index=False)
 def get_simulations(station_name, hist_start_date, hist_end_date,
                     ref_start_date, ref_end_date):
     weather_data = read_pickle_from_s3(
         bucket_spot_simulation, weather_data_path.format(station_name))
     hist_weather_data = dict()
     for k, v in weather_data.items():
         if (k >= hist_start_date) & (k < hist_end_date):
             hist_weather_data[k] = v
     sim_weather = dict()  # now it's actual weather
     for k, v in weather_data.items():
         if (k >= ref_start_date) & (k < ref_end_date):
             sim_weather[k] = v
     print("Weather \'simulation\' finished. Weather station: {}. {} ~ {}".
           format(station_name, ref_start_date, ref_end_date))
     return hist_weather_data, sim_weather
def stress_test_summary(run_id, sim_index):
    df_raw = read_pickle_from_s3(bucket_nbe,
                                 results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index))
    df = df_raw[['TradingRegion', 'SettlementDateTime', 'Adjusted EAR Cost']]
    df['WeekEnding'] = df['SettlementDateTime'].apply(get_week_ending)
    week_ending_init = df['WeekEnding'].iloc[0]
    df['FourWeekBlocks'] = df['WeekEnding'].apply(get_four_week_blocks, args=(week_ending_init, ))
    df_summarized = df[['TradingRegion', 'Adjusted EAR Cost',
                        'FourWeekBlocks']].groupby(['TradingRegion', 'FourWeekBlocks']).sum()
    df_grandtotal = df[['FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['FourWeekBlocks']).sum()
    df_grandtotal.reset_index(inplace=True)
    df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
    df_summarized.reset_index(inplace=True)
    df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True)
    df_summarized['SimNo'] = sim_index
    write_pickle_to_s3(df_summarized,
                       bucket_nbe,
                       results_stress_test_summary_by_sim_s3_pickle_path.format(run_id, sim_index))
    return df_summarized
    def get_all_reference_day(self):
        """
        :return:
        """
        # get the simulated weather data
        weather = self.get_simulations(self.weather_station,
                                       self.hist_start_date,
                                       self.hist_end_date, self.ref_start_date,
                                       self.ref_end_date)

        ref_history_weather = weather[0]
        simulated_weather = weather[1]
        # simulated maximum temperature
        sim_max_temperature = dict()
        for day in simulated_weather.keys():
            sim_max_temperature[day] = simulated_weather.get(day)[0]
        # simulated minimum temperature
        sim_min_temperature = dict()
        for day in simulated_weather.keys():
            sim_min_temperature[day] = simulated_weather.get(day)[1]
        reference_public_holiday = read_pickle_from_s3(
            bucket_spot_simulation, public_holiday_path.format(self.region))
        # construct the DataFrame of simulation days and their reference days
        sim_days_df = pd.DataFrame(
            list(sim_max_temperature.keys()),
            columns=['Date'])  # create DataFrame using the dates
        sim_days_df['Date'] = pd.to_datetime(sim_days_df['Date'],
                                             errors='coerce')
        sim_days_df['Max'] = list(sim_max_temperature.values())
        sim_days_df['BucketNo'] = sim_days_df['Max'].apply(temperature_bucket)
        sim_days_df['Season Type'] = sim_days_df['Date'].apply(season_type)
        sim_days_df['Day Type'] = sim_days_df['Date'].apply(
            day_type, args=(reference_public_holiday, ))
        buckets = get_hist_temp_bucket(ref_history_weather,
                                       reference_public_holiday, 'max')

        sim_days_df['Reference Days'] = sim_days_df.apply(find_the_bucket,
                                                          axis=1,
                                                          args=(buckets, ))

        return sim_days_df.drop(
            columns=['Max', 'BucketNo', 'Season Type', 'Day Type'])
Пример #8
0
def get_output(run_id, job_id, sim_num):
    # output by simulation - weekly/monthly/quarterly average price
    for p in ['week', 'month', 'quarter']:
        print("loading data of all simulations by {}.".format(p))
        all_sim_lst = []
        for i in range(sim_num):
            if i < 900:
                tmp_lst = \
                    read_pickle_from_s3(bucket_nbe,
                                        results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, i))
            else:
                tmp_lst = \
                    read_pickle_from_s3(bucket_nbe,
                                        results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id,
                                                                                        900 + (i - 900) * 9))
            all_sim_lst += tmp_lst
            print(i)
        col_period = 'WeekEnding' if p == 'week' else 'PeriodEnding'
        df_output = pd.DataFrame(all_sim_lst, columns=[col_period, 'TradingRegion', 'SimNo', 'Profile',
                                                       'Average Spot Price', 'Average Cap Payouts', 'NoOfHour',
                                                       'ProfileID', 'RegionID'])
        df_output['Profile'] = df_output['Profile'].apply(remove_profile_region_suffix)
        df_output['Spot Run No.'] = df_output.apply(lambda _: run_id, axis=1)
        df_output['Job No.'] = df_output.apply(lambda _: job_id, axis=1)
        csv_buffer = StringIO()
        df_output.to_csv(csv_buffer, index=False)
        s3_resource = boto3.resource('s3')
        s3_resource.Object(bucket_nbe,
                           results_avg_price_by_profile_by_sim_csv_path.format(job_id,
                                                                               run_id,
                                                                               p,
                                                                               job_id,
                                                                               run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - weekly
    print("loading data of all simulations by week.")
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = \
                read_pickle_from_s3(bucket_nbe,
                                    results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = \
                read_pickle_from_s3(bucket_nbe,
                                    results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                 run_id,
                                                                                                 900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)
    df_all_sim['Spot Run No.'] = run_id
    df_all_sim['Job No.'] = job_id
    # EAR_Output_by_simulations_by_week.csv
    df_all_sim_week_complete = df_all_sim
    df_all_sim_week_complete.reset_index(inplace=True)
    df_all_sim_week_complete = df_all_sim_week_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_week_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_week_complete.format(job_id,
                                                              run_id,
                                                              job_id,
                                                              run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_week_for_CFD_impact.csv
    df_all_sim_week = df_all_sim[
        ['TradingRegion', 'WeekEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo',
         'Spot Run No.', 'Job No.']]
    df_all_sim_week.reset_index(inplace=True)
    df_all_sim_week = df_all_sim_week.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_week.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_week.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - monthly
    print("loading data of all simulations by month.")
    df_all_sim_month = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                     run_id,
                                                                                                     900+(i - 900) * 9))
        df_all_sim_month = df_all_sim_month.append(df_tmp)
        print(i)
    df_all_sim_month['Spot Run No.'] = run_id
    df_all_sim_month['Job No.'] = job_id
    # EAR_Output_by_simulations_by_month.csv
    df_all_sim_month_complete = df_all_sim_month
    df_all_sim_month_complete.reset_index(inplace=True)
    df_all_sim_month_complete = df_all_sim_month_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_month_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_month_complete.format(job_id,
                                                               run_id,
                                                               job_id,
                                                               run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_month_for_CFD_impact.csv
    df_all_sim_month.reset_index(inplace=True)
    df_all_sim_month = df_all_sim_month.rename(columns={'index': 'Case'})
    df_all_sim_month = df_all_sim_month[
        ['Case', 'TradingRegion', 'MonthEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin',
         'SimNo', 'Spot Run No.', 'Job No.']]
    csv_buffer = StringIO()
    df_all_sim_month.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_month.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # output by simulation - quarterly
    print("loading data of all simulations by quarter.")
    df_all_sim_qtr = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i))
        else:
            df_tmp = read_pickle_from_s3(bucket_nbe,
                                         results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id,
                                                                                                     run_id,
                                                                                                     900+(i - 900) * 9))
        df_all_sim_qtr = df_all_sim_qtr.append(df_tmp)
        print(i)
    df_all_sim_qtr['Spot Run No.'] = run_id
    df_all_sim_qtr['Job No.'] = job_id
    # EAR_Output_by_simulations_by_quarter.csv
    df_all_sim_quarter_complete = df_all_sim_qtr
    df_all_sim_quarter_complete.reset_index(inplace=True)
    df_all_sim_quarter_complete = df_all_sim_quarter_complete.rename(columns={'index': 'Case'})
    csv_buffer = StringIO()
    df_all_sim_quarter_complete.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_quarter_complete.format(job_id,
                                                                 run_id,
                                                                 job_id,
                                                                 run_id)).put(Body=csv_buffer.getvalue())
    # EAR_Output_by_sim_by_quarter_for_CFD_impact.csv
    df_all_sim_qtr.reset_index(inplace=True)
    df_all_sim_qtr = df_all_sim_qtr.rename(columns={'index': 'Case'})
    df_all_sim_qtr = df_all_sim_qtr[
        ['Case', 'TradingRegion', 'QuarterEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin',
         'SimNo', 'Spot Run No.', 'Job No.']]
    csv_buffer = StringIO()
    df_all_sim_qtr.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_by_sim_by_quarter.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue())

    # save the mapping of percentile & sim_index of all percentiles
    percentile_list = [0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)',
                                'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost',
                                'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'WeekEnding', 'Percentile',
                                                              'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                                              'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd',
                                                              'Total Cost (excl GST)',
                                                              'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                                              'Transfer Cost', 'Wholesale Margin',
                                                              'SimNo. (based on Wholesale Margin)'])
    mapping_info = df_percentile_update[['TradingRegion', 'WeekEnding',
                                         'Percentile', 'SimNo. (based on Wholesale Margin)']].to_dict(orient='split')[
        'data']
    write_pickle_to_s3(mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id))
    print('mapping information saved.')

    # EAR_Output_by_normal_percentiles.csv
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)',
                                'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(
        percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile)
    df_percentile_update = pd.DataFrame.from_records(df_percentile_lst,
                                                     columns=['TradingRegion', 'WeekEnding', 'Percentile',
                                                              'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                                                              'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd',
                                                              'Total Cost (excl GST)',
                                                              'Cap Premium Cost', 'Total Cost (Incl Cap)',
                                                              'Transfer Cost', 'Wholesale Margin',
                                                              'SimNo. (based on Wholesale Margin)'])
    df_percentile_update['Spot Run No.'] = run_id
    df_percentile_update['Job No.'] = job_id
    df_percentile_update.reset_index(inplace=True)
    df_percentile_update = df_percentile_update.rename(columns={'index': 'Case'})
    # df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(lambda x: x.year)
    # df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(lambda x: x.month)
    # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_update.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(bucket_nbe,
                       results_EAR_normal_percentiles.format(job_id, run_id, job_id,
                                                             run_id)).put(Body=csv_buffer.getvalue())

    # output by risk percentiles for PBI
    '''
def get_output(run_id, job_id, sim_num):
    print("loading data of all simulations.")
    df_all_sim = pd.DataFrame()
    for i in range(sim_num):
        if i < 900:
            df_tmp = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_summary_by_simulation_s3_pickle_path.format(
                    run_id, job_id, i))
        else:
            df_tmp = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_summary_by_simulation_s3_pickle_path.format(
                    run_id, job_id, 900 + (i - 900) * 9))
        df_all_sim = df_all_sim.append(df_tmp)
        print(i)
    # # output by simulation
    # df_all_sim['Spot Run No.'] = run_id
    # df_all_sim.to_excel('NBE_EAR_Output_by_simulations_{}.xlsx'.format(run_id))

    # save the mapping of percentile & sim_index of all percentiles
    percentile_list = [
        0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1
    ]
    df_percentile = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim,
                                                      df_percentile)
    df_percentile_update = pd.DataFrame.from_records(
        df_percentile_lst,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    mapping_info = df_percentile_update[[
        'TradingRegion', 'WeekEnding', 'Percentile',
        'SimNo. (based on Total Cost)'
    ]].to_dict(orient='split')['data']
    write_pickle_to_s3(
        mapping_info, bucket_nbe,
        results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id))
    print('mapping information saved.')

    # output by normal percentiles
    percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1]
    df_percentile = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list)
    df_percentile.reset_index(inplace=True)
    df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'})
    df_percentile_lst = capture_sim_no_for_percentile(df_all_sim,
                                                      df_percentile)
    df_percentile_update = pd.DataFrame.from_records(
        df_percentile_lst,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    df_percentile_update['Spot Run No.'] = run_id
    df_percentile_update['Job No.'] = job_id
    df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(
        lambda x: x.year)
    df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(
        lambda x: x.month)
    # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_update.to_csv(csv_buffer)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(
        bucket_nbe,
        results_EAR_normal_percentiles.format(
            run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())

    # output by percentile for PBI
    percentile_list_risk = [0, 0.01, 0.02, 0.03, 0.05]
    df_percentile_risk = df_all_sim[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list_risk)
    df_percentile_risk.reset_index(inplace=True)
    df_percentile_risk = df_percentile_risk.rename(
        columns={'level_2': 'Percentile'})
    df_percentile_risk_lst = capture_sim_no_for_percentile(
        df_all_sim, df_percentile_risk)
    new_output = duplicate_percentile_for_pbi(df_percentile_risk_lst,
                                              p_position=2)
    df_percentile_pbi = pd.DataFrame.from_records(
        new_output,
        columns=[
            'TradingRegion', 'WeekEnding', 'Percentile',
            'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
            'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
            'Cap Premium Cost', 'Total Cost ($)',
            'SimNo. (based on Total Cost)'
        ])
    df_percentile_pbi['Spot Run No.'] = run_id
    df_percentile_pbi['Job No.'] = job_id
    df_percentile_pbi['Year'] = df_percentile_pbi['WeekEnding'].apply(
        lambda x: x.year)
    df_percentile_pbi['Month'] = df_percentile_pbi['WeekEnding'].apply(
        lambda x: x.month)
    # df_percentile_pbi.to_excel('NBE_EAR_Output_by_PBI_percentiles_{}_{}.xlsx'.format(run_id, job_id))
    # df_percentile_pbi.to_csv(results_EAR_PBI_percentiles.format(run_id, job_id))
    csv_buffer = StringIO()
    df_percentile_pbi.to_csv(csv_buffer)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(
        bucket_nbe,
        results_EAR_PBI_percentiles.format(
            run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())
def load_calculate_summarize(run_id, job_id, date_input, sim_index, start_year,
                             start_month, start_day, end_year, end_month,
                             end_day):
    """

    :param run_id:
    :param job_id:
    :param date_input:
    :param sim_index:
    :param start_year:
    :param start_month:
    :param start_day:
    :param end_year:
    :param end_month:
    :param end_day:
    :return:
    """
    start_time = time.time()
    print('Job {} using Spot Run {} starting... SimNo: {}'.format(
        job_id, run_id, sim_index))
    start_date = date(start_year, start_month, start_day)
    end_date = date(end_year, end_month, end_day)  # excl.

    # read deal position data
    df_all = read_pickle_from_s3(
        bucket_nbe, deal_capture_converted_path.format(job_id, date_input))
    states = sorted(list(set(
        df_all['TradingRegion'])))  # states = ['NSW1', 'QLD1', 'SA1', 'VIC1']

    # read simulated spot price data
    df_sp = pd.read_parquet(
        f"s3://{bucket_nbe}/{spot_price_by_sim_parquet_path.format(run_id, sim_index)}"
    )
    print('Spot price from {} SimNo. {} loaded.'.format(run_id, sim_index))
    df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date())
    df_sp = df_sp[(start_date <= df_sp['Date']) & (df_sp['Date'] < end_date)]
    if len(pd.concat([df_sp[state] for state in states])) != len(df_all):
        raise ValueError(
            f"load data size: {len(pd.concat([df_sp[state] for state in states]))}, expected:{len(df_all)}"
        )

    # sort by region to make sure spot price data and customer data are consistent
    df_all = df_all.sort_values(
        by=['TradingRegion', 'SettlementDateTime']).reset_index(drop=True)
    df_all['Spot Price'] = pd.concat([df_sp[state] for state in states],
                                     ignore_index=True)

    # read simulated customer load data
    load_data = {}
    for state in states:
        state_load_all = read_pickle_from_s3(
            bucket_nbe,
            meter_data_simulation_s3_pickle_path.format(
                run_id, sim_index // 9, 'NBE_{}'.format(state[:-1])))
        load_data[state] = pd.concat([
            state_load_all[key]['GRID_USAGE']
            for key in state_load_all if start_date <= key < end_date
        ],
                                     ignore_index=True)
    print('Customer data from {} SimNo. {} loaded.'.format(run_id, sim_index))
    if len(pd.concat([load_data[state] for state in states])) != len(df_all):
        raise ValueError(
            f"load data size: {len(pd.concat([load_data[state] for state in states]))}, expected:{len(df_all)}"
        )
    df_all['Customer Net MWh'] = pd.concat(
        [load_data[state] for state in states], ignore_index=True)
    df_all['Customer Net MWh'] = df_all['Customer Net MWh'].apply(lambda x: -x)

    # calculate earning at risk and output
    df = calculate_earning_at_risk(df_all)
    df_output = df[[
        'TradingRegion', 'SettlementDateTime', 'Swap Premium',
        'Swap Hedged Qty (MWh)', 'Swap Weighted Strike Price', 'Cap Premium',
        'Cap Hedged Qty (MWh)', 'Cap Weighted Strike Price', 'Spot Price',
        'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
        'Cap Premium Cost', 'Total Cost ($)'
    ]]
    print('Calculation finished. Uploading... {} SimNo. {}'.format(
        run_id, sim_index))
    # to keep a copy of half hour resolution raw data.
    write_pickle_to_s3(
        df_output, bucket_nbe,
        results_EAR_simulation_s3_pickle_path.format(run_id, job_id,
                                                     sim_index))

    # weekly summary
    print('Summarising the data into weekly resolution... {} SimNo. {}'.format(
        run_id, sim_index))
    df_output['WeekEnding'] = df_output['SettlementDateTime'].apply(
        get_week_ending)  # get the week ending date
    # sum by region by week ending
    df_summarized = df_output[[
        'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)',
        'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd',
        'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['TradingRegion', 'WeekEnding']).sum()
    # all regions' sum by week ending
    df_grandtotal = df_output[[
        'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
        'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost',
        'Cap Premium Cost', 'Total Cost ($)'
    ]].groupby(['WeekEnding']).sum()
    df_grandtotal.reset_index(inplace=True)
    df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
    df_summarized.reset_index(inplace=True)
    df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True)
    df_summarized['SimNo'] = sim_index
    print('Uploading weekly summary... {} SimNo. {}'.format(run_id, sim_index))
    write_pickle_to_s3(
        df_summarized, bucket_nbe,
        results_EAR_summary_by_simulation_s3_pickle_path.format(
            run_id, job_id, sim_index))
    end_time = time.time()
    print("Processing time {} SimNo. {} : {} seconds.".format(
        run_id, sim_index, end_time - start_time))
def get_hh_traces(run_id, job_id):
    """
    mapping_info is a list of percentile - sim no information, the values are:

    TradingRegion -> str  e.g. 'GrandTotal', 'VIC1', etc
    WeekEnding -> datetime.date  e.g. datetime.date(2021,1,9)
    Percentile -> float   e.g. 0.05
    Sim No. -> float   e.g. 269(.0)

    :param run_id:
    :param job_id:
    :return:
    """
    mapping_info = read_pickle_from_s3(
        bucket_nbe,
        results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id))
    df_hh_traces = pd.DataFrame()
    for elem in mapping_info:
        print(elem)
        if (elem[0] == 'GrandTotal') & (elem[1] <= analysis_end_date):
            week_ending = elem[1]
            week_starting = week_ending - timedelta(weeks=1)
            p = elem[2]
            sim_index = int(elem[3])
            df_sim = read_pickle_from_s3(
                bucket_nbe,
                results_EAR_simulation_s3_pickle_path.format(
                    run_id, job_id, sim_index))
            df_sim['Date'] = \
                df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date())
            df_tmp = df_sim[(df_sim['Date'] > week_starting)
                            & (df_sim['Date'] <= week_ending)][[
                                'TradingRegion', 'SettlementDateTime',
                                'Swap Hedged Qty (MWh)',
                                'Cap Hedged Qty (MWh)', 'Customer Net MWh',
                                'Spot Price', 'Total Cost (excl GST)',
                                'Total Cost (Incl Cap)'
                            ]]
            df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum()
            df_tmp_grandtotal.reset_index(inplace=True)
            df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal')
            df_tmp.reset_index(inplace=True, drop=True)
            df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True)
            df_tmp[
                'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2
            df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2
            df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2
            df_tmp = df_tmp.drop(columns=[
                'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)',
                'Customer Net MWh'
            ])
            df_tmp['Percentile'] = p
            df_hh_traces = df_hh_traces.append(df_tmp)
        else:
            continue
    df_hh_traces['Spot Run No.'] = run_id
    df_hh_traces['Job No.'] = job_id
    df_hh_traces.reset_index(inplace=True)
    df_hh_traces = df_hh_traces.rename(columns={'index': 'Case'})
    # df_hh_traces.csv('HH_Simulation_Traces_{}.csv'.format(run_id))
    # write_pickle_to_s3(df_hh_traces, project_bucket, results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id))
    csv_buffer = StringIO()
    df_hh_traces.to_csv(csv_buffer, index=False)
    s3_resource = boto3.resource('s3')
    s3_resource.Object(
        bucket_nbe,
        results_EAR_hh_traces_s3_pickle_path.format(
            run_id, job_id, job_id, run_id)).put(Body=csv_buffer.getvalue())