def get_generation_mix_process_df(regions=None): """ Create a dataframe of generation mixes by fuel type in each subregion. This function imports and uses the parameter 'gen_mix_from_model_generation_data' from globals.py. If the value is False it cannot currently handle regions other than 'all', 'NERC', 'US', or a single eGRID subregion. Parameters ---------- use_alt_gen_process : str, optional Not currently used (the default is 'egrid', which [default_description]) regions : str, optional Which regions to include (the default is 'all', which includes all eGRID subregions) Returns ------- DataFrame Sample output: >>> all_gen_mix_db.head() Subregion FuelCategory Electricity NERC Generation_Ratio 0 AKGD COAL 5.582922e+05 ASCC 0.116814 22 AKGD OIL 3.355753e+05 ASCC 0.070214 48 AKGD GAS 3.157474e+06 ASCC 0.660651 90 AKGD HYDRO 5.477350e+05 ASCC 0.114605 114 AKGD BIOMASS 5.616577e+04 ASCC 0.011752 """ from electricitylci.egrid_filter import ( electricity_for_selected_egrid_facilities, ) from electricitylci.generation_mix import ( create_generation_mix_process_df_from_model_generation_data, create_generation_mix_process_df_from_egrid_ref_data, ) from electricitylci.model_config import gen_mix_from_model_generation_data from electricitylci.model_config import replace_egrid from electricitylci.eia923_generation import build_generation_data from electricitylci.model_config import eia_gen_year if regions is None: regions = model_specs['regional_aggregation'] if replace_egrid: # assert regions == 'BA' or regions == 'NERC', 'Regions must be BA or NERC' print("Actual generation data is used when replacing eGRID") generation_data = build_generation_data( generation_years=[eia_gen_year]) generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data( generation_data, regions) else: if gen_mix_from_model_generation_data: generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data( electricity_for_selected_egrid_facilities, regions) else: generation_mix_process_df = create_generation_mix_process_df_from_egrid_ref_data( regions) return generation_mix_process_df
def create_generation_process_df(): """ Reads emissions and generation data from different sources to provide facility-level emissions. Most important inputs to this process come from the model configuration file. Parameters ---------- None Returns ---------- dataframe Datafrane includes all facility-level emissions """ from electricitylci.eia923_generation import (build_generation_data, eia923_primary_fuel) from electricitylci.egrid_filter import ( egrid_facilities_to_include, emissions_and_waste_for_selected_egrid_facilities, ) from electricitylci.generation import ( egrid_facilities_w_fuel_region, add_technological_correlation_score, add_temporal_correlation_score, ) import electricitylci.emissions_other_sources as em_other import electricitylci.ampd_plant_emissions as ampd from electricitylci.combinator import ba_codes import electricitylci.manual_edits as edits COMPARTMENT_DICT = { "emission/air": "air", "emission/water": "water", "emission/ground": "ground", "input": "input", "output": "output", "waste": "waste", "air": "air", "water": "water", "ground": "ground", } if model_specs.replace_egrid: generation_data = build_generation_data().drop_duplicates() cems_df = ampd.generate_plant_emissions(model_specs.eia_gen_year) cems_df.drop(columns=["FlowUUID"], inplace=True) emissions_and_waste_for_selected_egrid_facilities = em_other.integrate_replace_emissions( cems_df, emissions_and_waste_for_selected_egrid_facilities) else: from electricitylci.egrid_filter import electricity_for_selected_egrid_facilities generation_data = electricity_for_selected_egrid_facilities generation_data["Year"] = model_specs.egrid_year generation_data["FacilityID"] = generation_data["FacilityID"].astype( int) # generation_data = build_generation_data( # egrid_facilities_to_include=egrid_facilities_to_include # ) emissions_and_waste_for_selected_egrid_facilities.drop( columns=["FacilityID"]) emissions_and_waste_for_selected_egrid_facilities[ "eGRID_ID"] = emissions_and_waste_for_selected_egrid_facilities[ "eGRID_ID"].astype(int) final_database = pd.merge( left=emissions_and_waste_for_selected_egrid_facilities, right=generation_data, right_on=["FacilityID", "Year"], left_on=["eGRID_ID", "Year"], how="left", ) egrid_facilities_w_fuel_region[ "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype( int) final_database = pd.merge( left=final_database, right=egrid_facilities_w_fuel_region, left_on="eGRID_ID", right_on="FacilityID", how="left", suffixes=["", "_right"], ) if model_specs.replace_egrid: primary_fuel_df = eia923_primary_fuel(year=model_specs.eia_gen_year) primary_fuel_df.rename(columns={'Plant Id': "eGRID_ID"}, inplace=True) primary_fuel_df["eGRID_ID"] = primary_fuel_df["eGRID_ID"].astype(int) key_df = (primary_fuel_df[[ "eGRID_ID", "FuelCategory" ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID")) final_database["FuelCategory"] = final_database["eGRID_ID"].map( key_df["FuelCategory"]) else: key_df = (final_database[[ "eGRID_ID", "FuelCategory" ]].dropna().drop_duplicates(subset="eGRID_ID").set_index("eGRID_ID")) final_database.loc[final_database["FuelCategory"].isnull(), "FuelCategory"] = final_database.loc[ final_database["FuelCategory"].isnull(), "eGRID_ID"].map(key_df["FuelCategory"]) # if replace_egrid: # final_database["FuelCategory"].fillna( # final_database["FuelCategory_right"], inplace=True # ) final_database["Final_fuel_agg"] = final_database["FuelCategory"] # if model_specs.use_primaryfuel_for_coal: # final_database.loc[ # final_database["FuelCategory"] == "COAL", ["Final_fuel_agg"] # ] = final_database.loc[ # final_database["FuelCategory"] == "COAL", "PrimaryFuel" # ] try: year_filter = final_database["Year_x"] == final_database["Year_y"] final_database = final_database.loc[year_filter, :] final_database.drop(columns="Year_y", inplace=True) except KeyError: pass final_database.rename(columns={"Year_x": "Year"}, inplace=True) final_database = map_emissions_to_fedelemflows(final_database) dup_cols_check = [ "FacilityID", "FuelCategory", "FlowName", "FlowAmount", "Compartment", ] final_database = final_database.loc[:, ~final_database.columns.duplicated()] final_database = final_database.drop_duplicates(subset=dup_cols_check) final_database.drop( columns=["FuelCategory", "FacilityID_x", "FacilityID_y"], inplace=True) final_database.rename( columns={ "Final_fuel_agg": "FuelCategory", "TargetFlowUUID": "FlowUUID", }, inplace=True, ) final_database = add_temporal_correlation_score( final_database, model_specs.electricity_lci_target_year) final_database = add_technological_correlation_score(final_database) final_database["DataCollection"] = 5 final_database["GeographicalCorrelation"] = 1 final_database["eGRID_ID"] = final_database["eGRID_ID"].astype(int) final_database.sort_values(by=["eGRID_ID", "Compartment", "FlowName"], inplace=True) final_database["stage_code"] = "Power plant" final_database["Compartment_path"] = final_database["Compartment"] final_database["Compartment"] = final_database["Compartment_path"].map( COMPARTMENT_DICT) final_database["Balancing Authority Name"] = final_database[ "Balancing Authority Code"].map(ba_codes["BA_Name"]) final_database["EIA_Region"] = final_database[ "Balancing Authority Code"].map(ba_codes["EIA_Region"]) final_database["FERC_Region"] = final_database[ "Balancing Authority Code"].map(ba_codes["FERC_Region"]) final_database = edits.check_for_edits(final_database, "generation.py", "create_generation_process_df") return final_database
def get_generation_mix_process_df(regions=None): """ Create a dataframe of generation mixes by fuel type in each subregion. This function imports and uses the parameter 'replace_egrid' and 'gen_mix_from_model_generation_data' from model_config.py. If 'replace_egrid' is true or the specified 'regions' is true, then the generation mix will come from EIA 923 data. If 'replace_egrid' is false then the generation mix will either come from the eGRID reference data ('gen_mix_from_model_generation_data' is false) or from the generation data from this model ('gen_mix_from_model_generation_data' is true). Parameters ---------- regions : str, optional Which regions to include (the default is 'all', which includes all eGRID subregions) Returns ------- DataFrame Sample output: >>> all_gen_mix_db.head() Subregion FuelCategory Electricity NERC Generation_Ratio 0 AKGD COAL 5.582922e+05 ASCC 0.116814 22 AKGD OIL 3.355753e+05 ASCC 0.070214 48 AKGD GAS 3.157474e+06 ASCC 0.660651 90 AKGD HYDRO 5.477350e+05 ASCC 0.114605 114 AKGD BIOMASS 5.616577e+04 ASCC 0.011752 """ from electricitylci.egrid_filter import ( electricity_for_selected_egrid_facilities, ) from electricitylci.generation_mix import ( create_generation_mix_process_df_from_model_generation_data, create_generation_mix_process_df_from_egrid_ref_data, ) from electricitylci.eia923_generation import build_generation_data if regions is None: regions = config.model_specs.regional_aggregation if config.model_specs.replace_egrid or regions in ["BA", "FERC", "US"]: # assert regions == 'BA' or regions == 'NERC', 'Regions must be BA or NERC' if regions in ["BA", "FERC", "US" ] and not config.model_specs.replace_egrid: logger.info( f"EIA923 generation data is being used for the generation mix " f"despite replace_egrid = False. The reference eGrid electricity " f"data cannot be reorgnznied to match BA or FERC regions. For " f"the US region, the function for generating US mixes does not " f"support aggregating to the US.") print("EIA923 generation data is used when replacing eGRID") generation_data = build_generation_data( generation_years=[config.model_specs.eia_gen_year]) generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data( generation_data, regions) else: if config.model_specs.gen_mix_from_model_generation_data: generation_mix_process_df = create_generation_mix_process_df_from_model_generation_data( electricity_for_selected_egrid_facilities, regions) else: generation_mix_process_df = create_generation_mix_process_df_from_egrid_ref_data( regions) return generation_mix_process_df
def get_generation_process_df(use_alt_gen_process=None, regions=None, **kwargs): """ Create a dataframe of emissions from power generation by fuel type in each region. kwargs would include the upstream emissions dataframe (upstream_df) if upstream emissions are being included. Parameters ---------- use_alt_gen_process : bool, optional If the NETL alternate generation process method should be used (the default is None, which uses the value is read from a settings YAML file). regions : str, optional Regions to include in the analysis (the default is None, which uses the value read from a settings YAML file). Other options include "eGRID", "NERC", "BA", "US", "FERC", and "EIA" Returns ------- If DataFrame Each row represents information about a single emission from a fuel category in a single region. Columns are: 'Subregion', 'FuelCategory', 'FlowName', 'FlowUUID', 'Compartment', 'Year', 'Source', 'Unit', 'ElementaryFlowPrimeContext', 'TechnologicalCorrelation', 'TemporalCorrelation', 'DataCollection', 'Emission_factor', 'Reliability_Score', 'GeographicalCorrelation', 'GeomMean', 'GeomSD', 'Maximum', 'Minimum' """ if use_alt_gen_process is None: use_alt_gen_process = model_specs['use_alt_gen_process'] if regions is None: regions = model_specs['regional_aggregation'] if use_alt_gen_process is True: try: upstream_df = kwargs['upstream_df'] except KeyError: print( "A kwarg named 'upstream_dict' must be included if use_alt_gen_process " "is True") # upstream_df = get_upstream_process_df() if model_specs['include_upstream_processes'] is True: upstream_dict = write_upstream_process_database_to_dict( upstream_df) upstream_dict = write_upstream_dicts_to_jsonld(upstream_dict) gen_df = get_alternate_gen_plus_netl() combined_df, canadian_gen = combine_upstream_and_gen_df( gen_df, upstream_df) gen_plus_fuels = add_fuels_to_gen(gen_df, upstream_df, canadian_gen, upstream_dict) else: gen_df = get_alternate_gen_plus_netl() upstream_df = pd.DataFrame(columns=gen_df.columns) upstream_dict = {} gen_plus_fuels = gen_df #This change has been made to accomodate the new method of generating #consumption mixes for FERC regions. They now pull BAs to provide #a more accurate inventory. The tradeoff here is that it's no longer possible #to make a FERC region generation mix and also provide the consumption mix. #Or it could be possible but would requir running through aggregate twice. # generation_process_df = aggregate_gen( # gen_plus_fuels, subregion=regions # ) generation_process_df = aggregate_gen(gen_plus_fuels, subregion="BA") return generation_process_df else: from electricitylci.egrid_filter import ( electricity_for_selected_egrid_facilities, egrid_facilities_to_include, emissions_and_waste_for_selected_egrid_facilities, ) from electricitylci.eia923_generation import build_generation_data from electricitylci.generation import create_generation_process_df from electricitylci.model_config import replace_egrid if replace_egrid is True: # This is a dummy function that doesn't exist yet # updated_emissions = build_new_emissions(year) generation_data = build_generation_data() generation_process_df = create_generation_process_df( generation_data, emissions_and_waste_for_selected_egrid_facilities, subregion=regions, ) else: electricity_for_selected_egrid_facilities["Year"] = model_specs[ "egrid_year"] generation_process_df = create_generation_process_df( electricity_for_selected_egrid_facilities, emissions_and_waste_for_selected_egrid_facilities, subregion=regions, ) return generation_process_df
def generate_regional_grid_loss(final_database, year, subregion="all"): """This function generates transmission and distribution losses for the provided generation data and given year, aggregated by subregion. Arguments: final_database: dataframe The database containing plant-level emissions. year: int Analysis year for the transmission and distribution loss data. Ideally this should match the year of your final_database. Returns: td_by_region: dataframe A dataframe of transmission and distribution loss rates as a fraction. This dataframe can be used to generate unit processes for transmission and distribution to match the regionally- aggregated emissions unit processes. """ print("Generating factors for transmission and distribution losses") from electricitylci.eia923_generation import build_generation_data from electricitylci.combinator import ba_codes from electricitylci.egrid_facilities import egrid_facilities td_calc_columns = [ "State", "NERC", "FuelCategory", "PrimaryFuel", "NERC", "Balancing Authority Name", "Electricity", "Year", "Subregion", "FRS_ID", "eGRID_ID", ] # plant_generation = final_database[td_calc_columns].drop_duplicates() egrid_facilities_w_fuel_region = egrid_facilities[[ "FacilityID", "Subregion", "PrimaryFuel", "FuelCategory", "NERC", "PercentGenerationfromDesignatedFuelCategory", "Balancing Authority Name", "Balancing Authority Code", "State" ]] egrid_facilities_w_fuel_region[ "FacilityID"] = egrid_facilities_w_fuel_region["FacilityID"].astype( int) plant_generation = build_generation_data(generation_years=[year]) plant_generation["FacilityID"] = plant_generation["FacilityID"].astype(int) plant_generation = plant_generation.merge(egrid_facilities_w_fuel_region, on=["FacilityID"], how="left") plant_generation["Balancing Authority Name"] = plant_generation[ "Balancing Authority Code"].map(ba_codes["BA_Name"]) plant_generation["FERC_Region"] = plant_generation[ "Balancing Authority Code"].map(ba_codes["FERC_Region"]) plant_generation["EIA_Region"] = plant_generation[ "Balancing Authority Code"].map(ba_codes["EIA_Region"]) td_rates = eia_trans_dist_download_extract(f"{year}") td_by_plant = pd.merge( left=plant_generation, right=td_rates, left_on="State", right_index=True, how="left", ) td_by_plant.dropna(subset=["t_d_losses"], inplace=True) td_by_plant["t_d_losses"] = td_by_plant["t_d_losses"].astype(float) from electricitylci.aggregation_selector import subregion_col aggregation_column = subregion_col(subregion) wm = lambda x: np.average(x, weights=td_by_plant.loc[x.index, "Electricity"]) if aggregation_column is not None: td_by_region = td_by_plant.groupby( aggregation_column, as_index=False).agg({"t_d_losses": wm}) else: td_by_region = pd.DataFrame(td_by_plant.agg({"t_d_losses": wm}), columns=["t_d_losses"]) td_by_region["Region"] = "US" return td_by_region
def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None): REGION_NAMES = [ 'California', 'Carolinas', 'Central', 'Electric Reliability Council of Texas, Inc.', 'Florida', 'Mid-Atlantic', 'Midwest', 'New England ISO', 'New York Independent System Operator', 'Northwest', 'Southeast', 'Southwest', 'Tennessee Valley Authority' ] REGION_ACRONYMS = [ 'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA', 'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW', ] if year is None: year = model_specs.NETL_IO_trading_year if subregion is None: subregion = model_specs.regional_aggregation if subregion not in ['BA', 'FERC','US']: raise ValueError( f'subregion or regional_aggregation must have a value of "BA" or "FERC" ' f'when calculating trading with input-output, not {subregion}' ) # Read in BAA file which contains the names and abbreviations df_BA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'US', header = 4) df_BA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True) BA = pd.np.array(df_BA['BA_Acronym']) US_BA_acronyms = df_BA['BA_Acronym'].tolist() # Read in BAA file which contains the names and abbreviations # Original df_BAA does not include the Canadian balancing authorities # Import them here, then concatenate to make a single df_BAA_NA (North America) df_BA_CA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'Canada', header = 4) df_BA_CA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True) df_BA_NA = pd.concat([df_BA, df_BA_CA]) ferc_list = df_BA_NA['FERC_Region_Abbr'].unique().tolist() # Read in the bulk data # download_EBA() path = join(data_dir, 'bulk_data', 'EBA.zip') NET_GEN_ROWS = [] BA_TO_BA_ROWS = [] DEMAND_ROWS=[] TOTAL_INTERCHANGE_ROWS=[] try: logging.info("Using existing bulk data download") z = zipfile.ZipFile(path, 'r') except FileNotFoundError: logging.info("Downloading new bulk data") download_EBA() z = zipfile.ZipFile(path, 'r') logging.info("Loading bulk data to json") with z.open('EBA.txt') as f: for line in f: # All but one BA is currently reporting net generation in UTC and local time # for that one BA (GRMA) only UTC time is reported - so only pulling that # for now. if b'EBA.NG.H' in line and b'EBA.NG.HL' not in line: NET_GEN_ROWS.append(json.loads(line)) # Similarly there are 5 interchanges that report interchange in UTC but not in # local time. elif b'EBA.ID.H' in line and b'EBA.ID.HL' not in line: exchange_line=json.loads(line) if exchange_line['series_id'].split('-')[0][4:] not in REGION_ACRONYMS: # try: # Adding this check here to hopefully save some time down the road. # dummy_date=datetime.strptime(exchange_line['data'][0][0],'%Y%m%dT%HZ') BA_TO_BA_ROWS.append(exchange_line) # good_date_count+=1 # except ValueError: # bad_date_count+=1 # continue # Keeping these here just in case elif b'EBA.D.H' in line and b'EBA.D.HL' not in line: DEMAND_ROWS.append(json.loads(line)) # elif b'EBA.TI.H' in line: # TOTAL_INTERCHANGE_ROWS.append(json.loads(line)) logging.info(f"Net gen rows: {len(NET_GEN_ROWS)}; BA to BA rows:{len(BA_TO_BA_ROWS)}; Demand rows:{len(DEMAND_ROWS)}") eia923_gen=eia923.build_generation_data(generation_years=[year]) eia860_df=eia860.eia860_balancing_authority(year) eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int) eia_combined_df=eia923_gen.merge(eia860_df, left_on=["FacilityID"], right_on=["Plant Id"], how="left") eia_gen_ba=eia_combined_df.groupby(by=["Balancing Authority Code"],as_index=False)["Electricity"].sum() # Subset for specified eia_gen_year start_datetime = '{}-01-01 00:00:00+00:00'.format(year) end_datetime = '{}-12-31 23:00:00+00:00'.format(year) start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z') end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z') # Net Generation Data Import logging.info("Generating df with datetime") df_net_gen = row_to_df(NET_GEN_ROWS, 'net_gen') del(NET_GEN_ROWS) logging.info("Pivoting") df_net_gen = df_net_gen.pivot(index = 'datetime', columns = 'region', values = 'net_gen') ba_cols = US_BA_acronyms gen_cols = list(df_net_gen.columns.values) gen_cols_set = set(gen_cols) ba_ref_set = set(ba_cols) col_diff = list(ba_ref_set - gen_cols_set) col_diff.sort(key = str.upper) logging.info("Cleaning net_gen dataframe") # Add in missing columns, then sort in alphabetical order for i in col_diff: df_net_gen[i] = 0 # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset # that represent states (e.g., TEX, NY, FL) and other areas (US48) df_net_gen = df_net_gen[ba_cols] # Resort columns so the headers are in alpha order df_net_gen = df_net_gen.sort_index(axis=1) df_net_gen = df_net_gen.fillna(value = 0) df_net_gen = df_net_gen.loc[start_datetime:end_datetime] # Sum values in each column df_net_gen_sum = df_net_gen.sum(axis = 0).to_frame() logging.info("Reading canadian import data") # Add Canadian import data to the net generation dataset, concatenate and put in alpha order df_CA_Imports_Gen = pd.read_csv(data_dir + '/CA_Imports_Gen.csv', index_col = 0) df_CA_Imports_Gen = df_CA_Imports_Gen[str(year)] logging.info("Combining US and Canadian net gen data") df_net_gen_sum = pd.concat([df_net_gen_sum,df_CA_Imports_Gen]).sum(axis=1) df_net_gen_sum = df_net_gen_sum.to_frame() df_net_gen_sum = df_net_gen_sum.sort_index(axis=0) # Check the net generation of each Balancing Authority against EIA 923 data. # If the percent change of a given area is greater than the mean absolute difference # of all of the areas, it will be treated as an error and replaced with the # value in EIA923. logging.info("Checking against EIA 923 generation data") net_gen_check=df_net_gen_sum.merge( right=eia_gen_ba, left_index=True, right_on=["Balancing Authority Code"], how="left" ).reset_index() net_gen_check["diff"]=abs(net_gen_check["Electricity"]-net_gen_check[0])/net_gen_check[0] diff_mad=net_gen_check["diff"].mad() net_gen_swap=net_gen_check.loc[net_gen_check["diff"]>diff_mad,["Balancing Authority Code","Electricity"]].set_index("Balancing Authority Code") df_net_gen_sum.loc[net_gen_swap.index,[0]]=np.nan net_gen_swap.rename(columns={"Electricity":0},inplace=True) df_net_gen_sum=df_net_gen_sum.combine_first(net_gen_swap) # First work on the trading data from the 'df_trade_all_stack_2016' frame # This cell does the following: # 1. reformats the data to an annual basis # 2. formats the BA names in the corresponding columns # 3. evalutes the trade values from both BA perspectives # (e.g. BA1 as exporter and importer in a transaction with BA2) # 4. evaluates the trading data for any results that don't make sense # a. both BAs designate as importers (negative value) # b. both BAs designate as exporters (postive value) # c. one of the BAs in the transation reports a zero value and the other is nonzero # 5. calulate the percent difference in the transaction values reports by BAs # 6. final exchange value based on logic; # a. if percent diff is less than 20%, take mean, # b. if not use the value as reported by the exporting BAA # c. designate each BA in the transaction either as the importer or exporter # Output is a pivot with index (rows) representing exporting BAs, # columns representing importing BAs, and values for the traded amount # Group and resample trading data so that it is on an annual basis logging.info("Creating trading dataframe") df_ba_trade = ba_exchange_to_df(BA_TO_BA_ROWS, data_type='ba_to_ba') del(BA_TO_BA_ROWS) df_ba_trade = df_ba_trade.set_index('datetime') df_ba_trade['transacting regions'] = df_ba_trade['from_region'] + '-' + df_ba_trade['to_region'] logging.info("Filtering trading dataframe") # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset # that represent states (e.g., TEX, NY, FL) and other areas (US48) filt1 = df_ba_trade['from_region'].isin(ba_cols) filt2 = df_ba_trade['to_region'].isin(ba_cols) filt = filt1 & filt2 df_ba_trade = df_ba_trade[filt] # Subset for eia_gen_year, need to pivot first because of non-unique datetime index df_ba_trade_pivot = df_ba_trade.pivot(columns = 'transacting regions', values = 'ba_to_ba') df_ba_trade_pivot = df_ba_trade_pivot.loc[start_datetime:end_datetime] # Sum columns - represents the net transactced amount between the two BAs df_ba_trade_sum = df_ba_trade_pivot.sum(axis = 0).to_frame() df_ba_trade_sum = df_ba_trade_sum.reset_index() df_ba_trade_sum.columns = ['BAAs','Exchange'] # Split BAA string into exporting and importing BAA columns df_ba_trade_sum['BAA1'], df_ba_trade_sum['BAA2'] = df_ba_trade_sum['BAAs'].str.split('-', 1).str df_ba_trade_sum = df_ba_trade_sum.rename(columns={'BAAs': 'Transacting BAAs'}) # Create two perspectives - import and export to use for comparison in selection of the final exchange value between the BAAs df_trade_sum_1_2 = df_ba_trade_sum.groupby(['BAA1', 'BAA2','Transacting BAAs'], as_index=False)[['Exchange']].sum() df_trade_sum_2_1 = df_ba_trade_sum.groupby(['BAA2', 'BAA1', 'Transacting BAAs'], as_index=False)[['Exchange']].sum() df_trade_sum_1_2.columns = ['BAA1_1_2', 'BAA2_1_2','Transacting BAAs_1_2', 'Exchange_1_2'] df_trade_sum_2_1.columns = ['BAA2_2_1', 'BAA1_2_1','Transacting BAAs_2_1', 'Exchange_2_1'] # Combine two grouped tables for comparison for exchange values df_concat_trade = pd.concat([df_trade_sum_1_2,df_trade_sum_2_1], axis = 1) df_concat_trade['Exchange_1_2_abs'] = df_concat_trade['Exchange_1_2'].abs() df_concat_trade['Exchange_2_1_abs'] = df_concat_trade['Exchange_2_1'].abs() # Create new column to check if BAAs designate as either both exporters or both importers # or if one of the entities in the transaction reports a zero value # Drop combinations where any of these conditions are true, keep everything else df_concat_trade['Status_Check'] = np.where(((df_concat_trade['Exchange_1_2'] > 0) & (df_concat_trade['Exchange_2_1'] > 0)) \ |((df_concat_trade['Exchange_1_2'] < 0) & (df_concat_trade['Exchange_2_1'] < 0)) \ | ((df_concat_trade['Exchange_1_2'] == 0) | (df_concat_trade['Exchange_2_1'] == 0)), 'drop', 'keep') # Calculate the difference in exchange values df_concat_trade['Delta'] = df_concat_trade['Exchange_1_2_abs'] - df_concat_trade['Exchange_2_1_abs'] # Calculate percent diff of exchange_abs values - this can be down two ways: # relative to 1_2 exchange or relative to 2_1 exchange - perform the calc both ways # and take the average df_concat_trade['Percent_Diff_Avg']= ((abs((df_concat_trade['Exchange_1_2_abs']/df_concat_trade['Exchange_2_1_abs'])-1)) \ + (abs((df_concat_trade['Exchange_2_1_abs']/df_concat_trade['Exchange_1_2_abs'])-1)))/2 # Mean exchange value df_concat_trade['Exchange_mean'] = df_concat_trade[['Exchange_1_2_abs', 'Exchange_2_1_abs']].mean(axis=1) # Percent diff equations creats NaN where both values are 0, fill with 0 df_concat_trade['Percent_Diff_Avg'].fillna(0, inplace = True) # Final exchange value based on logic; if percent diff is less than 20%, take mean, # if not use the value as reported by the exporting BAA. First figure out which BAA is the exporter # by checking the value of the Exchance_1_2 # If that value is positive, it indicates that BAA1 is exported to BAA2; if negative, use the # value from Exchange_2_1 df_concat_trade['Final_Exchange'] = np.where((df_concat_trade['Percent_Diff_Avg'].abs() < 0.2), df_concat_trade['Exchange_mean'],np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['Exchange_1_2'],df_concat_trade['Exchange_2_1'])) # Assign final designation of BAA as exporter or importer based on logical assignment df_concat_trade['Export_BAA'] = np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA1_1_2'], np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA2_1_2'],'')) df_concat_trade['Import_BAA'] = np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA1_1_2'], np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA2_1_2'],'')) df_concat_trade = df_concat_trade[df_concat_trade['Status_Check'] == 'keep'] # Create the final trading matrix; first grab the necessary columns, rename the columns and then pivot df_concat_trade_subset = df_concat_trade[['Export_BAA', 'Import_BAA', 'Final_Exchange']] df_concat_trade_subset.columns = ['Exporting_BAA', 'Importing_BAA', 'Amount'] df_trade_pivot = df_concat_trade_subset.pivot_table(index = 'Exporting_BAA', columns = 'Importing_BAA', values = 'Amount').fillna(0) # This cell continues formatting the df_trade # Find missing BAs - need to add them in so that we have a square matrix # Not all BAs are involved in transactions trade_cols = list(df_trade_pivot.columns.values) trade_rows = list(df_trade_pivot.index.values) trade_cols_set = set(trade_cols) trade_rows_set = set(trade_rows) trade_ba_ref_set = set(ba_cols) trade_col_diff = list(trade_ba_ref_set - trade_cols_set) trade_col_diff.sort(key = str.upper) trade_row_diff = list(trade_ba_ref_set - trade_rows_set) trade_row_diff.sort(key=str.upper) # Add in missing columns, then sort in alphabetical order for i in trade_col_diff: df_trade_pivot[i] = 0 df_trade_pivot = df_trade_pivot.sort_index(axis=1) # Add in missing rows, then sort in alphabetical order for i in trade_row_diff: df_trade_pivot.loc[i,:] = 0 df_trade_pivot = df_trade_pivot.sort_index(axis=0) # Add Canadian Imports to the trading matrix # CA imports are specified in an external file df_CA_Imports_Cols = pd.read_csv(data_dir + '/CA_Imports_Cols.csv', index_col = 0) df_CA_Imports_Rows = pd.read_csv(data_dir + '/CA_Imports_Rows.csv', index_col = 0) df_CA_Imports_Rows = df_CA_Imports_Rows[['us_ba', str(year)]] df_CA_Imports_Rows = df_CA_Imports_Rows.pivot(columns = 'us_ba', values = str(year)) df_concat_trade_CA = pd.concat([df_trade_pivot, df_CA_Imports_Rows]) df_concat_trade_CA = pd.concat([df_concat_trade_CA, df_CA_Imports_Cols], axis = 1) df_concat_trade_CA.fillna(0, inplace = True) df_trade_pivot = df_concat_trade_CA df_trade_pivot = df_trade_pivot.sort_index(axis=0) df_trade_pivot = df_trade_pivot.sort_index(axis=1) # Perform trading calculations as provided in Qu et al (2018) to # determine the composition of a BA consumption mix # Create total inflow vector x and then convert to a diagonal matrix x-hat logging.info("Inflow vector") x = [] for i in range (len(df_net_gen_sum)): x.append(df_net_gen_sum.iloc[i] + df_trade_pivot.sum(axis = 0).iloc[i]) x_np = np.array(x) # If values are zero, x_hat matrix will be singular, set BAAs with 0 to small value (1) df_x = pd.DataFrame(data = x_np, index = df_trade_pivot.index) df_x = df_x.rename(columns = {0:'inflow'}) df_x.loc[df_x['inflow'] == 0] = 1 x_np = df_x.values x_hat = np.diagflat(x_np) # Create consumption vector c and then convert to a digaonal matrix c-hat # Calculate c based on x and T logging.info("consumption vector") c = [] for i in range(len(df_net_gen_sum)): c.append(x[i] - df_trade_pivot.sum(axis = 1).iloc[i]) c_np = np.array(c) c_hat = np.diagflat(c_np) # Convert df_trade_pivot to matrix T = df_trade_pivot.values # Create matrix to split T into distinct interconnections - i.e., prevent trading between eastern and western interconnects # Connections between the western and eastern interconnects are through SWPP and WAUE logging.info("Matrix operations") interconnect = df_trade_pivot.copy() interconnect[:] = 1 interconnect.loc['SWPP',['EPE', 'PNM', 'PSCO', 'WACM']] = 0 interconnect.loc['WAUE',['WAUW', 'WACM']] = 0 interconnect_mat = interconnect.values T_split = np.multiply(T, interconnect_mat) # Matrix trading math (see Qu et al. 2018 ES&T paper) x_hat_inv = np.linalg.inv(x_hat) B = np.matmul(T_split, x_hat_inv) I = np.identity(len(df_net_gen_sum)) diff_I_B = I - B G = np.linalg.inv(diff_I_B) c_hat_x_hat_inv = np.matmul(c_hat, x_hat_inv) G_c = np.matmul(G, c_hat) H = np.matmul(G,c_hat, x_hat_inv) df_G = pd.DataFrame(G) df_B = pd.DataFrame(B) df_H = pd.DataFrame(H) # Convert H to pandas dataframe, populate index and columns df_final_trade_out = df_H df_final_trade_out.columns = df_net_gen_sum.index df_final_trade_out.index = df_net_gen_sum.index # Develop trading input for the eLCI code. Need to melt the dataframe to end up with a three column # dataframe:Repeat for both possible aggregation levels - BA and FERC market region # Establish a threshold of 0.00001 to be included in the final trading matrix # Lots of really small values as a result of the matrix calculate (e.g., 2.0e-15) df_final_trade_out_filt = df_final_trade_out.copy() col_list = df_final_trade_out.columns.tolist() #Adding in a filter for balancing authorities that are not associated #with any specific plants in EIA860 - there won't be any data for them in #the emissions dataframes. We'll set their quantities to 0 so that the #consumption mixes are made up of the rest of the incoming balancing #authority areas. eia860_bas=sorted( list(eia860_df["Balancing Authority Code"].dropna().unique()) +list(df_CA_Imports_Cols.columns) ) keep_rows = [x for x in df_final_trade_out_filt.index if x in eia860_bas] keep_cols = [x for x in df_final_trade_out_filt.columns if x in eia860_bas] df_final_trade_out_filt=df_final_trade_out_filt.loc[keep_rows,keep_cols] col_list = df_final_trade_out_filt.columns.tolist() for i in col_list: df_final_trade_out_filt[i] = np.where(df_final_trade_out_filt[i].abs()/df_final_trade_out_filt[i].sum() < 0.00001, 0, df_final_trade_out_filt[i].abs()) df_final_trade_out_filt = df_final_trade_out_filt.reset_index() df_final_trade_out_filt = df_final_trade_out_filt.rename(columns = {'index':'Source BAA'}) df_final_trade_out_filt_melted = df_final_trade_out_filt.melt(id_vars = 'Source BAA' , value_vars=col_list) df_final_trade_out_filt_melted = df_final_trade_out_filt_melted.rename(columns = {'Source BAA':'export BAA', 'variable':'import BAA'}) # Merge to bring in import region name matched with BAA df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted.merge(df_BA_NA, left_on = 'import BAA', right_on = 'BA_Acronym') df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'import ferc region', 'FERC_Region_Abbr':'import ferc region abbr'}, inplace=True) df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True) # Merge to bring in export region name matched with BAA df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted_merge.merge(df_BA_NA, left_on = 'export BAA', right_on = 'BA_Acronym') if regions_to_keep is not None: # module_logger.info(f"{regions_to_keep}") # module_logger.info(f"{df_final_trade_out_filt_melted_merge['BA_Name'].unique()}") df_final_trade_out_filt_melted_merge=df_final_trade_out_filt_melted_merge.loc[df_final_trade_out_filt_melted_merge["BA_Name"].isin(regions_to_keep),:] df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'export ferc region', 'FERC_Region_Abbr':'export ferc region abbr'}, inplace=True) df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True) # if subregion == 'BA': # Develop final df for BAA BAA_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import BAA'])['value'].sum().reset_index() BAA_final_trade = df_final_trade_out_filt_melted_merge.copy() BAA_final_trade = BAA_final_trade.drop(columns = ['import ferc region', 'export ferc region', 'import ferc region abbr', 'export ferc region abbr']) BAA_final_trade = BAA_final_trade.merge(BAA_import_grouped_tot, left_on = 'import BAA', right_on = 'import BAA') BAA_final_trade = BAA_final_trade.rename(columns = {'value_x':'value','value_y':'total'}) BAA_final_trade['fraction'] = BAA_final_trade['value']/BAA_final_trade['total'] BAA_final_trade = BAA_final_trade.fillna(value = 0) BAA_final_trade = BAA_final_trade.drop(columns = ['value', 'total']) # Remove Canadian BAs in import list BAA_filt = BAA_final_trade['import BAA'].isin(eia860_bas) BAA_final_trade = BAA_final_trade[BAA_filt] # There are some BAs that will have 0 trade. Some of these are legitimate # Alcoa Yadkin has no demand (i.e., all power generation is exported) others # seem to be errors. For those BAs with actual demand, we'll set the # consumption mix to 100% from that BA. For those without demand, # fraction will be set to near 0 just to make sure systems can be built # in openLCA BAA_zero_trade = [x for x in list(BAA_final_trade["import BAA"].unique()) if BAA_final_trade.loc[BAA_final_trade["import BAA"]==x,"fraction"].sum()==0] BAAs_from_zero_trade_with_demand = [] for d_row in DEMAND_ROWS: if d_row["series_id"].split('.')[1].split('-')[0] in BAA_zero_trade: BAAs_from_zero_trade_with_demand.append(d_row["series_id"].split('.')[1].split('-')[0]) BAAs_from_zero_trade_with_demand = list(set(BAAs_from_zero_trade_with_demand)) del(DEMAND_ROWS) for baa in BAAs_from_zero_trade_with_demand: BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1 for baa in list(set(BAA_zero_trade)-set(BAAs_from_zero_trade_with_demand)): BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1E-15 #Was later decided to not create consumption mixes for BAs that don't have imports. BAA_final_trade.drop(BAA_final_trade[BAA_final_trade["import BAA"]==baa].index,inplace=True) BAA_final_trade.to_csv(output_dir + '/BAA_final_trade_{}.csv'.format(year)) BAA_final_trade["export_name"]=BAA_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) BAA_final_trade["import_name"]=BAA_final_trade["import BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return BAA_final_trade # elif subregion == 'FERC': ferc_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import ferc region'])['value'].sum().reset_index() # Develop final df for FERC Market Region ferc_final_trade = df_final_trade_out_filt_melted_merge.copy() # ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export ferc region','export ferc region abbr'])['value'].sum().reset_index() ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export BAA'])['value'].sum().reset_index() ferc_final_trade = ferc_final_trade.merge(ferc_import_grouped_tot, left_on = 'import ferc region', right_on = 'import ferc region') ferc_final_trade = ferc_final_trade.rename(columns = {'value_x':'value','value_y':'total'}) ferc_final_trade['fraction'] = ferc_final_trade['value']/ferc_final_trade['total'] ferc_final_trade = ferc_final_trade.fillna(value = 0) ferc_final_trade = ferc_final_trade.drop(columns = ['value', 'total']) # Remove Canadian entry in import list ferc_list.remove('CAN') ferc_filt = ferc_final_trade['import ferc region abbr'].isin(ferc_list) ferc_final_trade = ferc_final_trade[ferc_filt] ferc_final_trade.to_csv(output_dir + '/ferc_final_trade_{}.csv'.format(year)) ferc_final_trade["export_name"]=ferc_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return ferc_final_trade # elif subregion== 'US': us_import_grouped_tot = df_final_trade_out_filt_melted_merge['value'].sum() us_final_trade = df_final_trade_out_filt_melted_merge.copy() us_final_trade = us_final_trade.groupby(['export BAA'])['value'].sum().reset_index() us_final_trade["fraction"]=us_final_trade["value"]/us_import_grouped_tot us_final_trade = us_final_trade.fillna(value = 0) us_final_trade=us_final_trade.drop(columns = ["value"]) us_final_trade["export_name"]=us_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return us_final_trade return {'BA':BAA_final_trade,'FERC':ferc_final_trade,'US':us_final_trade}