def get_hh_traces_stress_test(run_id, region): mapping_info = read_pickle_from_s3(project_bucket, results_stress_test_summary_mapping_s3_pickle_path.format(run_id)) df_hh_traces = pd.DataFrame() for elem in mapping_info: print(elem) if (elem[0] == region) & (elem[1] <= datetime(2022, 1, 1).date()): week_ending = elem[1] week_starting = week_ending - timedelta(weeks=1) p = elem[2] sim_index = int(elem[3]) df_sim = read_pickle_from_s3(project_bucket, results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index)) df_sim['Date'] = \ df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date()) df_tmp = df_sim[(df_sim['Date'] > week_starting) & (df_sim['Date'] <= week_ending)][['TradingRegion', 'SettlementDateTime', 'Spot Price', 'adjusted price', 'Adjusted EAR Cost']] df_tmp['Percentile'] = p df_tmp['PercentileBase'] = elem[0] df_hh_traces = df_hh_traces.append(df_tmp) else: continue write_pickle_to_s3(df_hh_traces, project_bucket, results_stress_test_hh_traces_s3_pickle_path.format(run_id, region))
def simulate_demand_profile(key_name): region = key_name.split('/')[1] distributor = key_name.split('/')[2].split('.')[0] # get reference day parameters from Spot Simulations # object_key = spot_simulation_input_extra_info.format(run_id) # s3 = boto3.client('s3') # obj = s3.get_object(Bucket=bucket_nbe, Key=object_key) # data = obj['Body'].read() # extra_sim_mapping = pd.read_excel(io.BytesIO(data)) # # extra_sim_mapping = pd.read_excel('../input/mapping.xlsx') # extra_sim_mapping['original_sim'] = (extra_sim_mapping[1] / 9).astype('int') # extra_sims = extra_sim_mapping[['original_sim', 3]].to_dict(orient='split')['data'] params = read_pickle_from_s3(bucket_spot_simulation, parameters_for_batch_v2.format(run_id)) meter_data_dic = read_pickle_from_s3( bucket_nbe, meter_data_pickle_path.format(region, distributor)) for sim_i in params.keys(): sim_meter_data = dict() for week in params[ sim_i][:209]: # first 209 weeks in simulation: to 2024-1-2 for day in week: sim_day = day['sim_date'] ref_day = day['ref_date'] sim_meter_data[sim_day] = meter_data_dic[ref_day] write_pickle_to_s3( sim_meter_data, bucket_nbe, meter_data_simulation_s3_pickle_path.format( run_id, sim_i, distributor)) print(sim_i)
def get_hh_traces(run_id, job_id): mapping_info = read_pickle_from_s3( bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id)) df_hh_traces = pd.DataFrame() for elem in mapping_info: print(elem) if (elem[0] == 'GrandTotal') & (elem[1] <= datetime(2022, 1, 1).date()): week_ending = elem[1] week_starting = week_ending - timedelta(weeks=1) p = elem[2] sim_index = int(elem[3]) df_sim = read_pickle_from_s3( bucket_nbe, results_EAR_simulation_s3_pickle_path.format( run_id, job_id, sim_index)) df_sim['Date'] = \ df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date()) df_tmp = df_sim[(df_sim['Date'] > week_starting) & (df_sim['Date'] <= week_ending)][[ 'TradingRegion', 'SettlementDateTime', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Spot Price', 'Total Cost ($)' ]] df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum() df_tmp_grandtotal.reset_index(inplace=True) df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_tmp.reset_index(inplace=True, drop=True) df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True) df_tmp[ 'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2 df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2 df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2 df_tmp = df_tmp.drop(columns=[ 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh' ]) df_tmp['Percentile'] = p df_hh_traces = df_hh_traces.append(df_tmp) else: continue df_hh_traces['Spot Run No.'] = run_id df_hh_traces['Job No.'] = job_id # df_hh_traces.to_csv('HH_Simulation_Traces_{}.csv'.format(run_id)) write_pickle_to_s3( df_hh_traces, bucket_nbe, results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id))
def get_output_stress_test(run_id, sim_num): # output by simulation df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(runid, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(runid, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) # output by percentile, normal percentile format, and save the mapping of percentile & sim_index percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['TradingRegion', 'FourWeekBlocks']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile_stress_test(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'FourWeekBlocks', 'Percentile', 'Adjusted EAR Cost', 'SimNo. (based on Adjusted EAR Cost)']) df_percentile_update = df_percentile_update.rename(columns={'Adjusted EAR Cost': 'Adjusted Wholesale Margin', 'SimNo. (based on Adjusted EAR Cost)': 'SimNo. (based on Adjusted Wholesale Margin)'}) mapping_info = df_percentile_update[['TradingRegion', 'FourWeekBlocks', 'Percentile', 'SimNo. (based on Adjusted Wholesale Margin)']].to_dict(orient='split')['data'] write_pickle_to_s3(mapping_info, bucket_nbe, results_stress_test_summary_mapping_s3_pickle_path.format(run_id)) print('mapping information saved.') new_output = duplicate_percentile_for_pbi_stress_test(df_percentile_lst, p_position=2) df_percentile_pbi = pd.DataFrame.from_records(new_output, columns=['TradingRegion', 'FourWeeksBlocks_WeekEnding', 'Percentile', 'Adjusted Wholesale Margin', 'SimNo. (based on Adjusted Wholesale Margin)']) df_percentile_pbi['Spot Run No.'] = run_id df_percentile_pbi['Job No.'] = job_id # df_percentile_pbi.to_excel('NBE_StressTest_Output_by_PBI_percentiles_{}.xlsx'.format(run_id)) df_percentile_pbi.reset_index(inplace=True) df_percentile_pbi = df_percentile_pbi.rename(columns={'index': 'Case'}) df_percentile_pbi.to_csv('EAR_StressTest_Output_by_PBI_percentiles_{}_{}.csv'.format(job_id, run_id), index=False)
def get_simulations(station_name, hist_start_date, hist_end_date, ref_start_date, ref_end_date): weather_data = read_pickle_from_s3( bucket_spot_simulation, weather_data_path.format(station_name)) hist_weather_data = dict() for k, v in weather_data.items(): if (k >= hist_start_date) & (k < hist_end_date): hist_weather_data[k] = v sim_weather = dict() # now it's actual weather for k, v in weather_data.items(): if (k >= ref_start_date) & (k < ref_end_date): sim_weather[k] = v print("Weather \'simulation\' finished. Weather station: {}. {} ~ {}". format(station_name, ref_start_date, ref_end_date)) return hist_weather_data, sim_weather
def stress_test_summary(run_id, sim_index): df_raw = read_pickle_from_s3(bucket_nbe, results_stress_test_by_sim_s3_pickle_path.format(run_id, sim_index, sim_index)) df = df_raw[['TradingRegion', 'SettlementDateTime', 'Adjusted EAR Cost']] df['WeekEnding'] = df['SettlementDateTime'].apply(get_week_ending) week_ending_init = df['WeekEnding'].iloc[0] df['FourWeekBlocks'] = df['WeekEnding'].apply(get_four_week_blocks, args=(week_ending_init, )) df_summarized = df[['TradingRegion', 'Adjusted EAR Cost', 'FourWeekBlocks']].groupby(['TradingRegion', 'FourWeekBlocks']).sum() df_grandtotal = df[['FourWeekBlocks', 'Adjusted EAR Cost']].groupby(['FourWeekBlocks']).sum() df_grandtotal.reset_index(inplace=True) df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_summarized.reset_index(inplace=True) df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True) df_summarized['SimNo'] = sim_index write_pickle_to_s3(df_summarized, bucket_nbe, results_stress_test_summary_by_sim_s3_pickle_path.format(run_id, sim_index)) return df_summarized
def get_all_reference_day(self): """ :return: """ # get the simulated weather data weather = self.get_simulations(self.weather_station, self.hist_start_date, self.hist_end_date, self.ref_start_date, self.ref_end_date) ref_history_weather = weather[0] simulated_weather = weather[1] # simulated maximum temperature sim_max_temperature = dict() for day in simulated_weather.keys(): sim_max_temperature[day] = simulated_weather.get(day)[0] # simulated minimum temperature sim_min_temperature = dict() for day in simulated_weather.keys(): sim_min_temperature[day] = simulated_weather.get(day)[1] reference_public_holiday = read_pickle_from_s3( bucket_spot_simulation, public_holiday_path.format(self.region)) # construct the DataFrame of simulation days and their reference days sim_days_df = pd.DataFrame( list(sim_max_temperature.keys()), columns=['Date']) # create DataFrame using the dates sim_days_df['Date'] = pd.to_datetime(sim_days_df['Date'], errors='coerce') sim_days_df['Max'] = list(sim_max_temperature.values()) sim_days_df['BucketNo'] = sim_days_df['Max'].apply(temperature_bucket) sim_days_df['Season Type'] = sim_days_df['Date'].apply(season_type) sim_days_df['Day Type'] = sim_days_df['Date'].apply( day_type, args=(reference_public_holiday, )) buckets = get_hist_temp_bucket(ref_history_weather, reference_public_holiday, 'max') sim_days_df['Reference Days'] = sim_days_df.apply(find_the_bucket, axis=1, args=(buckets, )) return sim_days_df.drop( columns=['Max', 'BucketNo', 'Season Type', 'Day Type'])
def get_output(run_id, job_id, sim_num): # output by simulation - weekly/monthly/quarterly average price for p in ['week', 'month', 'quarter']: print("loading data of all simulations by {}.".format(p)) all_sim_lst = [] for i in range(sim_num): if i < 900: tmp_lst = \ read_pickle_from_s3(bucket_nbe, results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, i)) else: tmp_lst = \ read_pickle_from_s3(bucket_nbe, results_avg_price_by_profile_by_sim_path.format(p, job_id, run_id, 900 + (i - 900) * 9)) all_sim_lst += tmp_lst print(i) col_period = 'WeekEnding' if p == 'week' else 'PeriodEnding' df_output = pd.DataFrame(all_sim_lst, columns=[col_period, 'TradingRegion', 'SimNo', 'Profile', 'Average Spot Price', 'Average Cap Payouts', 'NoOfHour', 'ProfileID', 'RegionID']) df_output['Profile'] = df_output['Profile'].apply(remove_profile_region_suffix) df_output['Spot Run No.'] = df_output.apply(lambda _: run_id, axis=1) df_output['Job No.'] = df_output.apply(lambda _: job_id, axis=1) csv_buffer = StringIO() df_output.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_avg_price_by_profile_by_sim_csv_path.format(job_id, run_id, p, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - weekly print("loading data of all simulations by week.") df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = \ read_pickle_from_s3(bucket_nbe, results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = \ read_pickle_from_s3(bucket_nbe, results_EAR_week_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) df_all_sim['Spot Run No.'] = run_id df_all_sim['Job No.'] = job_id # EAR_Output_by_simulations_by_week.csv df_all_sim_week_complete = df_all_sim df_all_sim_week_complete.reset_index(inplace=True) df_all_sim_week_complete = df_all_sim_week_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_week_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_week_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_week_for_CFD_impact.csv df_all_sim_week = df_all_sim[ ['TradingRegion', 'WeekEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] df_all_sim_week.reset_index(inplace=True) df_all_sim_week = df_all_sim_week.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_week.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_week.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - monthly print("loading data of all simulations by month.") df_all_sim_month = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_mth_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900+(i - 900) * 9)) df_all_sim_month = df_all_sim_month.append(df_tmp) print(i) df_all_sim_month['Spot Run No.'] = run_id df_all_sim_month['Job No.'] = job_id # EAR_Output_by_simulations_by_month.csv df_all_sim_month_complete = df_all_sim_month df_all_sim_month_complete.reset_index(inplace=True) df_all_sim_month_complete = df_all_sim_month_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_month_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_month_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_month_for_CFD_impact.csv df_all_sim_month.reset_index(inplace=True) df_all_sim_month = df_all_sim_month.rename(columns={'index': 'Case'}) df_all_sim_month = df_all_sim_month[ ['Case', 'TradingRegion', 'MonthEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] csv_buffer = StringIO() df_all_sim_month.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_month.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by simulation - quarterly print("loading data of all simulations by quarter.") df_all_sim_qtr = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, i)) else: df_tmp = read_pickle_from_s3(bucket_nbe, results_EAR_qtr_summary_by_simulation_s3_pickle_path.format(job_id, run_id, 900+(i - 900) * 9)) df_all_sim_qtr = df_all_sim_qtr.append(df_tmp) print(i) df_all_sim_qtr['Spot Run No.'] = run_id df_all_sim_qtr['Job No.'] = job_id # EAR_Output_by_simulations_by_quarter.csv df_all_sim_quarter_complete = df_all_sim_qtr df_all_sim_quarter_complete.reset_index(inplace=True) df_all_sim_quarter_complete = df_all_sim_quarter_complete.rename(columns={'index': 'Case'}) csv_buffer = StringIO() df_all_sim_quarter_complete.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_quarter_complete.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # EAR_Output_by_sim_by_quarter_for_CFD_impact.csv df_all_sim_qtr.reset_index(inplace=True) df_all_sim_qtr = df_all_sim_qtr.rename(columns={'index': 'Case'}) df_all_sim_qtr = df_all_sim_qtr[ ['Case', 'TradingRegion', 'QuarterEnding', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo', 'Spot Run No.', 'Job No.']] csv_buffer = StringIO() df_all_sim_qtr.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_by_sim_by_quarter.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # save the mapping of percentile & sim_index of all percentiles percentile_list = [0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo. (based on Wholesale Margin)']) mapping_info = df_percentile_update[['TradingRegion', 'WeekEnding', 'Percentile', 'SimNo. (based on Wholesale Margin)']].to_dict(orient='split')[ 'data'] write_pickle_to_s3(mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id)) print('mapping information saved.') # EAR_Output_by_normal_percentiles.csv percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[['TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin']].groupby(['TradingRegion', 'WeekEnding']).quantile( percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records(df_percentile_lst, columns=['TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'Total Cost (excl GST)', 'Cap Premium Cost', 'Total Cost (Incl Cap)', 'Transfer Cost', 'Wholesale Margin', 'SimNo. (based on Wholesale Margin)']) df_percentile_update['Spot Run No.'] = run_id df_percentile_update['Job No.'] = job_id df_percentile_update.reset_index(inplace=True) df_percentile_update = df_percentile_update.rename(columns={'index': 'Case'}) # df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply(lambda x: x.year) # df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply(lambda x: x.month) # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_update.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object(bucket_nbe, results_EAR_normal_percentiles.format(job_id, run_id, job_id, run_id)).put(Body=csv_buffer.getvalue()) # output by risk percentiles for PBI '''
def get_output(run_id, job_id, sim_num): print("loading data of all simulations.") df_all_sim = pd.DataFrame() for i in range(sim_num): if i < 900: df_tmp = read_pickle_from_s3( bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, i)) else: df_tmp = read_pickle_from_s3( bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, 900 + (i - 900) * 9)) df_all_sim = df_all_sim.append(df_tmp) print(i) # # output by simulation # df_all_sim['Spot Run No.'] = run_id # df_all_sim.to_excel('NBE_EAR_Output_by_simulations_{}.xlsx'.format(run_id)) # save the mapping of percentile & sim_index of all percentiles percentile_list = [ 0, 0.01, 0.02, 0.03, 0.04, 0.05, 0.25, 0.5, 0.75, 0.95, 1 ] df_percentile = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records( df_percentile_lst, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) mapping_info = df_percentile_update[[ 'TradingRegion', 'WeekEnding', 'Percentile', 'SimNo. (based on Total Cost)' ]].to_dict(orient='split')['data'] write_pickle_to_s3( mapping_info, bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(run_id, job_id)) print('mapping information saved.') # output by normal percentiles percentile_list = [0, 0.05, 0.25, 0.5, 0.75, 0.95, 1] df_percentile = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list) df_percentile.reset_index(inplace=True) df_percentile = df_percentile.rename(columns={'level_2': 'Percentile'}) df_percentile_lst = capture_sim_no_for_percentile(df_all_sim, df_percentile) df_percentile_update = pd.DataFrame.from_records( df_percentile_lst, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) df_percentile_update['Spot Run No.'] = run_id df_percentile_update['Job No.'] = job_id df_percentile_update['Year'] = df_percentile_update['WeekEnding'].apply( lambda x: x.year) df_percentile_update['Month'] = df_percentile_update['WeekEnding'].apply( lambda x: x.month) # df_percentile_update.to_excel('NBE_EAR_Output_by_normal_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_update.to_csv(results_EAR_normal_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_update.to_csv(csv_buffer) s3_resource = boto3.resource('s3') s3_resource.Object( bucket_nbe, results_EAR_normal_percentiles.format( run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue()) # output by percentile for PBI percentile_list_risk = [0, 0.01, 0.02, 0.03, 0.05] df_percentile_risk = df_all_sim[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).quantile(percentile_list_risk) df_percentile_risk.reset_index(inplace=True) df_percentile_risk = df_percentile_risk.rename( columns={'level_2': 'Percentile'}) df_percentile_risk_lst = capture_sim_no_for_percentile( df_all_sim, df_percentile_risk) new_output = duplicate_percentile_for_pbi(df_percentile_risk_lst, p_position=2) df_percentile_pbi = pd.DataFrame.from_records( new_output, columns=[ 'TradingRegion', 'WeekEnding', 'Percentile', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)', 'SimNo. (based on Total Cost)' ]) df_percentile_pbi['Spot Run No.'] = run_id df_percentile_pbi['Job No.'] = job_id df_percentile_pbi['Year'] = df_percentile_pbi['WeekEnding'].apply( lambda x: x.year) df_percentile_pbi['Month'] = df_percentile_pbi['WeekEnding'].apply( lambda x: x.month) # df_percentile_pbi.to_excel('NBE_EAR_Output_by_PBI_percentiles_{}_{}.xlsx'.format(run_id, job_id)) # df_percentile_pbi.to_csv(results_EAR_PBI_percentiles.format(run_id, job_id)) csv_buffer = StringIO() df_percentile_pbi.to_csv(csv_buffer) s3_resource = boto3.resource('s3') s3_resource.Object( bucket_nbe, results_EAR_PBI_percentiles.format( run_id, job_id, run_id, job_id)).put(Body=csv_buffer.getvalue())
def load_calculate_summarize(run_id, job_id, date_input, sim_index, start_year, start_month, start_day, end_year, end_month, end_day): """ :param run_id: :param job_id: :param date_input: :param sim_index: :param start_year: :param start_month: :param start_day: :param end_year: :param end_month: :param end_day: :return: """ start_time = time.time() print('Job {} using Spot Run {} starting... SimNo: {}'.format( job_id, run_id, sim_index)) start_date = date(start_year, start_month, start_day) end_date = date(end_year, end_month, end_day) # excl. # read deal position data df_all = read_pickle_from_s3( bucket_nbe, deal_capture_converted_path.format(job_id, date_input)) states = sorted(list(set( df_all['TradingRegion']))) # states = ['NSW1', 'QLD1', 'SA1', 'VIC1'] # read simulated spot price data df_sp = pd.read_parquet( f"s3://{bucket_nbe}/{spot_price_by_sim_parquet_path.format(run_id, sim_index)}" ) print('Spot price from {} SimNo. {} loaded.'.format(run_id, sim_index)) df_sp['Date'] = df_sp['Half Hour Starting'].apply(lambda x: x.date()) df_sp = df_sp[(start_date <= df_sp['Date']) & (df_sp['Date'] < end_date)] if len(pd.concat([df_sp[state] for state in states])) != len(df_all): raise ValueError( f"load data size: {len(pd.concat([df_sp[state] for state in states]))}, expected:{len(df_all)}" ) # sort by region to make sure spot price data and customer data are consistent df_all = df_all.sort_values( by=['TradingRegion', 'SettlementDateTime']).reset_index(drop=True) df_all['Spot Price'] = pd.concat([df_sp[state] for state in states], ignore_index=True) # read simulated customer load data load_data = {} for state in states: state_load_all = read_pickle_from_s3( bucket_nbe, meter_data_simulation_s3_pickle_path.format( run_id, sim_index // 9, 'NBE_{}'.format(state[:-1]))) load_data[state] = pd.concat([ state_load_all[key]['GRID_USAGE'] for key in state_load_all if start_date <= key < end_date ], ignore_index=True) print('Customer data from {} SimNo. {} loaded.'.format(run_id, sim_index)) if len(pd.concat([load_data[state] for state in states])) != len(df_all): raise ValueError( f"load data size: {len(pd.concat([load_data[state] for state in states]))}, expected:{len(df_all)}" ) df_all['Customer Net MWh'] = pd.concat( [load_data[state] for state in states], ignore_index=True) df_all['Customer Net MWh'] = df_all['Customer Net MWh'].apply(lambda x: -x) # calculate earning at risk and output df = calculate_earning_at_risk(df_all) df_output = df[[ 'TradingRegion', 'SettlementDateTime', 'Swap Premium', 'Swap Hedged Qty (MWh)', 'Swap Weighted Strike Price', 'Cap Premium', 'Cap Hedged Qty (MWh)', 'Cap Weighted Strike Price', 'Spot Price', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]] print('Calculation finished. Uploading... {} SimNo. {}'.format( run_id, sim_index)) # to keep a copy of half hour resolution raw data. write_pickle_to_s3( df_output, bucket_nbe, results_EAR_simulation_s3_pickle_path.format(run_id, job_id, sim_index)) # weekly summary print('Summarising the data into weekly resolution... {} SimNo. {}'.format( run_id, sim_index)) df_output['WeekEnding'] = df_output['SettlementDateTime'].apply( get_week_ending) # get the week ending date # sum by region by week ending df_summarized = df_output[[ 'TradingRegion', 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['TradingRegion', 'WeekEnding']).sum() # all regions' sum by week ending df_grandtotal = df_output[[ 'WeekEnding', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Pool Cost', 'Swap Cfd', 'Cap Cfd', 'EAR Cost', 'Cap Premium Cost', 'Total Cost ($)' ]].groupby(['WeekEnding']).sum() df_grandtotal.reset_index(inplace=True) df_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_summarized.reset_index(inplace=True) df_summarized = df_summarized.append(df_grandtotal).reset_index(drop=True) df_summarized['SimNo'] = sim_index print('Uploading weekly summary... {} SimNo. {}'.format(run_id, sim_index)) write_pickle_to_s3( df_summarized, bucket_nbe, results_EAR_summary_by_simulation_s3_pickle_path.format( run_id, job_id, sim_index)) end_time = time.time() print("Processing time {} SimNo. {} : {} seconds.".format( run_id, sim_index, end_time - start_time))
def get_hh_traces(run_id, job_id): """ mapping_info is a list of percentile - sim no information, the values are: TradingRegion -> str e.g. 'GrandTotal', 'VIC1', etc WeekEnding -> datetime.date e.g. datetime.date(2021,1,9) Percentile -> float e.g. 0.05 Sim No. -> float e.g. 269(.0) :param run_id: :param job_id: :return: """ mapping_info = read_pickle_from_s3( bucket_nbe, results_EAR_summary_mapping_s3_pickle_path.format(job_id, run_id)) df_hh_traces = pd.DataFrame() for elem in mapping_info: print(elem) if (elem[0] == 'GrandTotal') & (elem[1] <= analysis_end_date): week_ending = elem[1] week_starting = week_ending - timedelta(weeks=1) p = elem[2] sim_index = int(elem[3]) df_sim = read_pickle_from_s3( bucket_nbe, results_EAR_simulation_s3_pickle_path.format( run_id, job_id, sim_index)) df_sim['Date'] = \ df_sim['SettlementDateTime'].apply(lambda row: (row.to_pydatetime() - timedelta(minutes=30)).date()) df_tmp = df_sim[(df_sim['Date'] > week_starting) & (df_sim['Date'] <= week_ending)][[ 'TradingRegion', 'SettlementDateTime', 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh', 'Spot Price', 'Total Cost (excl GST)', 'Total Cost (Incl Cap)' ]] df_tmp_grandtotal = df_tmp.groupby(['SettlementDateTime']).sum() df_tmp_grandtotal.reset_index(inplace=True) df_tmp_grandtotal.insert(0, 'TradingRegion', 'GrandTotal') df_tmp.reset_index(inplace=True, drop=True) df_tmp = df_tmp.append(df_tmp_grandtotal).reset_index(drop=True) df_tmp[ 'Swap Hedged Qty (MW)'] = df_tmp['Swap Hedged Qty (MWh)'] * 2 df_tmp['Cap Hedged Qty (MW)'] = df_tmp['Cap Hedged Qty (MWh)'] * 2 df_tmp['Customer Net MW'] = df_tmp['Customer Net MWh'] * 2 df_tmp = df_tmp.drop(columns=[ 'Swap Hedged Qty (MWh)', 'Cap Hedged Qty (MWh)', 'Customer Net MWh' ]) df_tmp['Percentile'] = p df_hh_traces = df_hh_traces.append(df_tmp) else: continue df_hh_traces['Spot Run No.'] = run_id df_hh_traces['Job No.'] = job_id df_hh_traces.reset_index(inplace=True) df_hh_traces = df_hh_traces.rename(columns={'index': 'Case'}) # df_hh_traces.csv('HH_Simulation_Traces_{}.csv'.format(run_id)) # write_pickle_to_s3(df_hh_traces, project_bucket, results_EAR_hh_traces_s3_pickle_path.format(run_id, job_id)) csv_buffer = StringIO() df_hh_traces.to_csv(csv_buffer, index=False) s3_resource = boto3.resource('s3') s3_resource.Object( bucket_nbe, results_EAR_hh_traces_s3_pickle_path.format( run_id, job_id, job_id, run_id)).put(Body=csv_buffer.getvalue())