def eia_facility_fuel_region(year): primary_fuel = eia923_primary_fuel(year=year) ba_match = eia860_balancing_authority(year) primary_fuel["Plant Id"] = primary_fuel["Plant Id"].astype(int) ba_match["Plant Id"] = ba_match["Plant Id"].astype(int) combined = primary_fuel.merge(ba_match, on='Plant Id') combined['primary fuel percent gen'] = ( combined['primary fuel percent gen'] / 100) combined.rename(columns={ 'primary fuel percent gen': 'PercentGenerationfromDesignatedFuelCategory', 'Plant Id': 'FacilityID', 'fuel category': 'FuelCategory', 'NERC Region': 'NERC', }, inplace=True) return combined
def build_generation_data(egrid_facilities_to_include=None, generation_years=None): """ Build a dataset of facility-level generation using EIA923. This function will apply filters for positive generation, generation efficiency within a given range, and a minimum percent of generation from the primary fuel (if set in the config file). The returned dataframe also includes the balancing authority for every power plant. Parameters ---------- egrid_facilities_to_include : list, optional List of plant codes to include (default is None, which builds a list) generation_years : list, optional Years of generation data to include in the output (default is None, which builds a list from the inventories of interest and eia_gen_year parameters) Returns ---------- DataFrame Dataframe columns include: ['FacilityID', 'Electricity', 'Year'] """ if not generation_years: # Use the years from inventories of interest generation_years = set( list(inventories_of_interest.values()) + [eia_gen_year]) df_list = [] for year in generation_years: gen_fuel_data = eia923_download_extract(year) primary_fuel = eia923_primary_fuel(gen_fuel_data) gen_efficiency = calculate_plant_efficiency(gen_fuel_data) final_gen_df = gen_efficiency.merge(primary_fuel, on="Plant Id") if not egrid_facilities_to_include: if include_only_egrid_facilities_with_positive_generation: final_gen_df = final_gen_df.loc[ final_gen_df["Net Generation (Megawatthours)"] >= 0, :] if filter_on_efficiency: final_gen_df = efficiency_filter(final_gen_df) if filter_on_min_plant_percent_generation_from_primary_fuel and not keep_mixed_plant_category: final_gen_df = final_gen_df.loc[ final_gen_df["primary fuel percent gen"] >= min_plant_percent_generation_from_primary_fuel_category, :, ] # if filter_non_egrid_emission_on_NAICS: # # Check with Wes to see what the filter here is supposed to be # final_gen_df = final_gen_df.loc[ # final_gen_df['NAICS Code'] == '22', : # ] else: final_gen_df = final_gen_df.loc[ final_gen_df["Plant Id"].isin(egrid_facilities_to_include), :] ba_match = eia860_balancing_authority(year) ba_match["Plant Id"] = ba_match["Plant Id"].astype(int) final_gen_df["Plant Id"] = final_gen_df["Plant Id"].astype(int) final_gen_df = final_gen_df.merge(ba_match, on="Plant Id", how="left") final_gen_df["Year"] = int(year) df_list.append(final_gen_df) all_years_gen = pd.concat(df_list) all_years_gen = all_years_gen.rename( columns={ "Plant Id": "FacilityID", "Net Generation (Megawatthours)": "Electricity", }) all_years_gen = all_years_gen.loc[:, ["FacilityID", "Electricity", "Year"]] all_years_gen.reset_index(drop=True, inplace=True) all_years_gen["Year"] = all_years_gen["Year"].astype("int32") return all_years_gen
def generate_hydro_emissions(): """ This generates a dataframe of hydro power plant emissions using data from an analysis performed at NETL that estimates biogenic reservoir emissions. The reservoir emissions are allocated among all uses for the reservoir (e.g., boating, fishing, etc.) using market-based allocation. The year for the inventory is fixed to 2016. Parameters ---------- None Returns ------- dataframe: Dataframe populated with CO2 and CH4 emissions as well as consumptive water use induced by the hydroelectric generator. """ DATA_FILE="hydropower_plant.csv" hydro_df = pd.read_csv( f"{data_dir}/{DATA_FILE}", index_col=0, low_memory=False ) hydro_df.rename(columns={"Plant Id":"FacilityID","Annual Net Generation (MWh)":"Electricity"},inplace=True) FLOW_DICTIONARY={ "co2 (kg)":{"FlowName":"Carbon dioxide","FlowUUID":"b6f010fb-a764-3063-af2d-bcb8309a97b7","Compartment_path":"emission/air","Compartment":"air"}, "ch4 (kg)":{"FlowName":"Methane","FlowUUID":"aab83476-ec6c-3742-af85-15d320b7ce80","Compartment_path":"emission/air","Compartment":"air"}, "water use (m3)":{"FlowName":"Water, fresh","FlowUUID":"8ba7dd57-b502-397b-944a-f63c6615f754","Compartment_path":"resource/water","Compartment":"input"} } hydro_df.drop(columns=["co2e (kg)"],inplace=True) hydro_df=hydro_df.melt( id_vars=["FacilityID","Plant Name","NERC Region","Electricity"], var_name="flow" ) hydro_df.rename(columns={"value":"FlowAmount","NERC Region":"NERC"},inplace=True) hydro_df["FlowDict"]=hydro_df["flow"].map(FLOW_DICTIONARY) hydro_df = pd.concat( [hydro_df, hydro_df["FlowDict"].apply(pd.Series)], axis=1 ) hydro_df["Year"]=2016 hydro_df["Source"]="netl" eia860_df=eia860_balancing_authority(2016) eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int) hydro_df=hydro_df.merge(eia860_df, left_on="FacilityID", right_on="Plant Id", suffixes=["","_eia"]) hydro_df.drop(columns=["FlowDict","flow","NERC Region","Plant Id"],inplace=True) hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","FlowAmount"]=hydro_df.loc[hydro_df["FlowName"]=="Waster, fresh","FlowAmount"]*1000 hydro_df["Unit"]="kg" hydro_df["stage_code"] = "Power plant" hydro_df["TechnologicalCorrelation"] = 1 hydro_df["GeographicalCorrelation"] = 1 hydro_df["TemporalCorrelation"] = 1 hydro_df["DataCollection"] = 5 hydro_df["ReliabilityScore"] = 1 hydro_df["eGRID_ID"]=hydro_df["FacilityID"] hydro_df["FuelCategory"]="HYDRO" hydro_df["PrimaryFuel"]="WAT" hydro_df["quantity"]=hydro_df["Electricity"] hydro_df["ElementaryFlowPrimeContext"]="emission" hydro_df["input"]=False hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","ElementaryFlowPrimeContext"]="resource" hydro_df.loc[hydro_df["FlowName"]=="Water, fresh","input"]=True hydro_df["plant_id"]=hydro_df["FacilityID"] hydro_df["Compartment"]=hydro_df["Compartment_path"] return hydro_df
def fill_nans(df, key_column="FacilityID", target_columns=[], dropna=True): """Fills nan values for the specified target columns by using the data from other rows, using the key_column for matches. There is an extra step to fill remaining nans for the state column because the module to calculate transmission and distribution losses needs values in the state column to work. Parameters ---------- df : dataframe Dataframe containing nans and at a minimum the columns key_column and target_columns key_column : str, optional The column to match for the data to fill target_columns, by default "FacilityID" target_columns : list, optional A list of columns with nans to fill, by default []. If empty, the function will use a pre-defined set of columns. dropna : bool, optional After nans are filled, drop rows that still contain nans in the target columns, by default True Returns ------- dataframe: hopefully with all of the nans filled. """ from electricitylci.eia860_facilities import eia860_balancing_authority if not target_columns: target_columns = [ "Balancing Authority Code", "Balancing Authority Name", "FuelCategory", "NERC", "PercentGenerationfromDesignatedFuelCategory", "eGRID_ID", "Subregion", "FERC_Region", "EIA_Region", "State", "Electricity" ] confirmed_target = [] for x in target_columns: if x in df.columns: confirmed_target.append(x) else: module_logger.warning(f"Column {x} is not in the dataframe") if key_column not in df.columns: module_logger.warning(f"Key column '{key_column}' is not in the dataframe") raise KeyError # key_df = ( # df[[key_column] + target_columns] # .drop_duplicates(subset=key_column) # .set_index(key_column) # ) for col in confirmed_target: key_df = ( df[[key_column,col]] .dropna() .drop_duplicates(subset=key_column) .set_index(key_column) ) df.loc[df[col].isnull(), col] = df.loc[ df[col].isnull(), key_column ].map(key_df[col]) plant_ba = eia860_balancing_authority(eia_gen_year).set_index("Plant Id") plant_ba.index = plant_ba.index.astype(int) if "State" not in df.columns: df["State"]=float("nan") confirmed_target.append("State") df.loc[df["State"].isna(), "State"] = df.loc[ df["State"].isna(), "eGRID_ID" ].map(plant_ba["State"]) if dropna: df.dropna(subset=confirmed_target, inplace=True) return df
def ba_io_trading_model(year=None, subregion=None, regions_to_keep=None): REGION_NAMES = [ 'California', 'Carolinas', 'Central', 'Electric Reliability Council of Texas, Inc.', 'Florida', 'Mid-Atlantic', 'Midwest', 'New England ISO', 'New York Independent System Operator', 'Northwest', 'Southeast', 'Southwest', 'Tennessee Valley Authority' ] REGION_ACRONYMS = [ 'TVA', 'MIDA', 'CAL', 'CAR', 'CENT', 'ERCO', 'FLA', 'MIDW', 'ISNE', 'NYIS', 'NW', 'SE', 'SW', ] if year is None: year = model_specs.NETL_IO_trading_year if subregion is None: subregion = model_specs.regional_aggregation if subregion not in ['BA', 'FERC','US']: raise ValueError( f'subregion or regional_aggregation must have a value of "BA" or "FERC" ' f'when calculating trading with input-output, not {subregion}' ) # Read in BAA file which contains the names and abbreviations df_BA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'US', header = 4) df_BA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True) BA = pd.np.array(df_BA['BA_Acronym']) US_BA_acronyms = df_BA['BA_Acronym'].tolist() # Read in BAA file which contains the names and abbreviations # Original df_BAA does not include the Canadian balancing authorities # Import them here, then concatenate to make a single df_BAA_NA (North America) df_BA_CA = pd.read_excel(data_dir + '/BA_Codes_930.xlsx', sheet_name = 'Canada', header = 4) df_BA_CA.rename(columns={'etag ID': 'BA_Acronym', 'Entity Name': 'BA_Name','NCR_ID#': 'NRC_ID', 'Region': 'Region'}, inplace=True) df_BA_NA = pd.concat([df_BA, df_BA_CA]) ferc_list = df_BA_NA['FERC_Region_Abbr'].unique().tolist() # Read in the bulk data # download_EBA() path = join(data_dir, 'bulk_data', 'EBA.zip') NET_GEN_ROWS = [] BA_TO_BA_ROWS = [] DEMAND_ROWS=[] TOTAL_INTERCHANGE_ROWS=[] try: logging.info("Using existing bulk data download") z = zipfile.ZipFile(path, 'r') except FileNotFoundError: logging.info("Downloading new bulk data") download_EBA() z = zipfile.ZipFile(path, 'r') logging.info("Loading bulk data to json") with z.open('EBA.txt') as f: for line in f: # All but one BA is currently reporting net generation in UTC and local time # for that one BA (GRMA) only UTC time is reported - so only pulling that # for now. if b'EBA.NG.H' in line and b'EBA.NG.HL' not in line: NET_GEN_ROWS.append(json.loads(line)) # Similarly there are 5 interchanges that report interchange in UTC but not in # local time. elif b'EBA.ID.H' in line and b'EBA.ID.HL' not in line: exchange_line=json.loads(line) if exchange_line['series_id'].split('-')[0][4:] not in REGION_ACRONYMS: # try: # Adding this check here to hopefully save some time down the road. # dummy_date=datetime.strptime(exchange_line['data'][0][0],'%Y%m%dT%HZ') BA_TO_BA_ROWS.append(exchange_line) # good_date_count+=1 # except ValueError: # bad_date_count+=1 # continue # Keeping these here just in case elif b'EBA.D.H' in line and b'EBA.D.HL' not in line: DEMAND_ROWS.append(json.loads(line)) # elif b'EBA.TI.H' in line: # TOTAL_INTERCHANGE_ROWS.append(json.loads(line)) logging.info(f"Net gen rows: {len(NET_GEN_ROWS)}; BA to BA rows:{len(BA_TO_BA_ROWS)}; Demand rows:{len(DEMAND_ROWS)}") eia923_gen=eia923.build_generation_data(generation_years=[year]) eia860_df=eia860.eia860_balancing_authority(year) eia860_df["Plant Id"]=eia860_df["Plant Id"].astype(int) eia_combined_df=eia923_gen.merge(eia860_df, left_on=["FacilityID"], right_on=["Plant Id"], how="left") eia_gen_ba=eia_combined_df.groupby(by=["Balancing Authority Code"],as_index=False)["Electricity"].sum() # Subset for specified eia_gen_year start_datetime = '{}-01-01 00:00:00+00:00'.format(year) end_datetime = '{}-12-31 23:00:00+00:00'.format(year) start_datetime = datetime.strptime(start_datetime, '%Y-%m-%d %H:%M:%S%z') end_datetime = datetime.strptime(end_datetime, '%Y-%m-%d %H:%M:%S%z') # Net Generation Data Import logging.info("Generating df with datetime") df_net_gen = row_to_df(NET_GEN_ROWS, 'net_gen') del(NET_GEN_ROWS) logging.info("Pivoting") df_net_gen = df_net_gen.pivot(index = 'datetime', columns = 'region', values = 'net_gen') ba_cols = US_BA_acronyms gen_cols = list(df_net_gen.columns.values) gen_cols_set = set(gen_cols) ba_ref_set = set(ba_cols) col_diff = list(ba_ref_set - gen_cols_set) col_diff.sort(key = str.upper) logging.info("Cleaning net_gen dataframe") # Add in missing columns, then sort in alphabetical order for i in col_diff: df_net_gen[i] = 0 # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset # that represent states (e.g., TEX, NY, FL) and other areas (US48) df_net_gen = df_net_gen[ba_cols] # Resort columns so the headers are in alpha order df_net_gen = df_net_gen.sort_index(axis=1) df_net_gen = df_net_gen.fillna(value = 0) df_net_gen = df_net_gen.loc[start_datetime:end_datetime] # Sum values in each column df_net_gen_sum = df_net_gen.sum(axis = 0).to_frame() logging.info("Reading canadian import data") # Add Canadian import data to the net generation dataset, concatenate and put in alpha order df_CA_Imports_Gen = pd.read_csv(data_dir + '/CA_Imports_Gen.csv', index_col = 0) df_CA_Imports_Gen = df_CA_Imports_Gen[str(year)] logging.info("Combining US and Canadian net gen data") df_net_gen_sum = pd.concat([df_net_gen_sum,df_CA_Imports_Gen]).sum(axis=1) df_net_gen_sum = df_net_gen_sum.to_frame() df_net_gen_sum = df_net_gen_sum.sort_index(axis=0) # Check the net generation of each Balancing Authority against EIA 923 data. # If the percent change of a given area is greater than the mean absolute difference # of all of the areas, it will be treated as an error and replaced with the # value in EIA923. logging.info("Checking against EIA 923 generation data") net_gen_check=df_net_gen_sum.merge( right=eia_gen_ba, left_index=True, right_on=["Balancing Authority Code"], how="left" ).reset_index() net_gen_check["diff"]=abs(net_gen_check["Electricity"]-net_gen_check[0])/net_gen_check[0] diff_mad=net_gen_check["diff"].mad() net_gen_swap=net_gen_check.loc[net_gen_check["diff"]>diff_mad,["Balancing Authority Code","Electricity"]].set_index("Balancing Authority Code") df_net_gen_sum.loc[net_gen_swap.index,[0]]=np.nan net_gen_swap.rename(columns={"Electricity":0},inplace=True) df_net_gen_sum=df_net_gen_sum.combine_first(net_gen_swap) # First work on the trading data from the 'df_trade_all_stack_2016' frame # This cell does the following: # 1. reformats the data to an annual basis # 2. formats the BA names in the corresponding columns # 3. evalutes the trade values from both BA perspectives # (e.g. BA1 as exporter and importer in a transaction with BA2) # 4. evaluates the trading data for any results that don't make sense # a. both BAs designate as importers (negative value) # b. both BAs designate as exporters (postive value) # c. one of the BAs in the transation reports a zero value and the other is nonzero # 5. calulate the percent difference in the transaction values reports by BAs # 6. final exchange value based on logic; # a. if percent diff is less than 20%, take mean, # b. if not use the value as reported by the exporting BAA # c. designate each BA in the transaction either as the importer or exporter # Output is a pivot with index (rows) representing exporting BAs, # columns representing importing BAs, and values for the traded amount # Group and resample trading data so that it is on an annual basis logging.info("Creating trading dataframe") df_ba_trade = ba_exchange_to_df(BA_TO_BA_ROWS, data_type='ba_to_ba') del(BA_TO_BA_ROWS) df_ba_trade = df_ba_trade.set_index('datetime') df_ba_trade['transacting regions'] = df_ba_trade['from_region'] + '-' + df_ba_trade['to_region'] logging.info("Filtering trading dataframe") # Keep only the columns that match the balancing authority names, there are several other columns included in the dataset # that represent states (e.g., TEX, NY, FL) and other areas (US48) filt1 = df_ba_trade['from_region'].isin(ba_cols) filt2 = df_ba_trade['to_region'].isin(ba_cols) filt = filt1 & filt2 df_ba_trade = df_ba_trade[filt] # Subset for eia_gen_year, need to pivot first because of non-unique datetime index df_ba_trade_pivot = df_ba_trade.pivot(columns = 'transacting regions', values = 'ba_to_ba') df_ba_trade_pivot = df_ba_trade_pivot.loc[start_datetime:end_datetime] # Sum columns - represents the net transactced amount between the two BAs df_ba_trade_sum = df_ba_trade_pivot.sum(axis = 0).to_frame() df_ba_trade_sum = df_ba_trade_sum.reset_index() df_ba_trade_sum.columns = ['BAAs','Exchange'] # Split BAA string into exporting and importing BAA columns df_ba_trade_sum['BAA1'], df_ba_trade_sum['BAA2'] = df_ba_trade_sum['BAAs'].str.split('-', 1).str df_ba_trade_sum = df_ba_trade_sum.rename(columns={'BAAs': 'Transacting BAAs'}) # Create two perspectives - import and export to use for comparison in selection of the final exchange value between the BAAs df_trade_sum_1_2 = df_ba_trade_sum.groupby(['BAA1', 'BAA2','Transacting BAAs'], as_index=False)[['Exchange']].sum() df_trade_sum_2_1 = df_ba_trade_sum.groupby(['BAA2', 'BAA1', 'Transacting BAAs'], as_index=False)[['Exchange']].sum() df_trade_sum_1_2.columns = ['BAA1_1_2', 'BAA2_1_2','Transacting BAAs_1_2', 'Exchange_1_2'] df_trade_sum_2_1.columns = ['BAA2_2_1', 'BAA1_2_1','Transacting BAAs_2_1', 'Exchange_2_1'] # Combine two grouped tables for comparison for exchange values df_concat_trade = pd.concat([df_trade_sum_1_2,df_trade_sum_2_1], axis = 1) df_concat_trade['Exchange_1_2_abs'] = df_concat_trade['Exchange_1_2'].abs() df_concat_trade['Exchange_2_1_abs'] = df_concat_trade['Exchange_2_1'].abs() # Create new column to check if BAAs designate as either both exporters or both importers # or if one of the entities in the transaction reports a zero value # Drop combinations where any of these conditions are true, keep everything else df_concat_trade['Status_Check'] = np.where(((df_concat_trade['Exchange_1_2'] > 0) & (df_concat_trade['Exchange_2_1'] > 0)) \ |((df_concat_trade['Exchange_1_2'] < 0) & (df_concat_trade['Exchange_2_1'] < 0)) \ | ((df_concat_trade['Exchange_1_2'] == 0) | (df_concat_trade['Exchange_2_1'] == 0)), 'drop', 'keep') # Calculate the difference in exchange values df_concat_trade['Delta'] = df_concat_trade['Exchange_1_2_abs'] - df_concat_trade['Exchange_2_1_abs'] # Calculate percent diff of exchange_abs values - this can be down two ways: # relative to 1_2 exchange or relative to 2_1 exchange - perform the calc both ways # and take the average df_concat_trade['Percent_Diff_Avg']= ((abs((df_concat_trade['Exchange_1_2_abs']/df_concat_trade['Exchange_2_1_abs'])-1)) \ + (abs((df_concat_trade['Exchange_2_1_abs']/df_concat_trade['Exchange_1_2_abs'])-1)))/2 # Mean exchange value df_concat_trade['Exchange_mean'] = df_concat_trade[['Exchange_1_2_abs', 'Exchange_2_1_abs']].mean(axis=1) # Percent diff equations creats NaN where both values are 0, fill with 0 df_concat_trade['Percent_Diff_Avg'].fillna(0, inplace = True) # Final exchange value based on logic; if percent diff is less than 20%, take mean, # if not use the value as reported by the exporting BAA. First figure out which BAA is the exporter # by checking the value of the Exchance_1_2 # If that value is positive, it indicates that BAA1 is exported to BAA2; if negative, use the # value from Exchange_2_1 df_concat_trade['Final_Exchange'] = np.where((df_concat_trade['Percent_Diff_Avg'].abs() < 0.2), df_concat_trade['Exchange_mean'],np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['Exchange_1_2'],df_concat_trade['Exchange_2_1'])) # Assign final designation of BAA as exporter or importer based on logical assignment df_concat_trade['Export_BAA'] = np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA1_1_2'], np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA2_1_2'],'')) df_concat_trade['Import_BAA'] = np.where((df_concat_trade['Exchange_1_2'] < 0), df_concat_trade['BAA1_1_2'], np.where((df_concat_trade['Exchange_1_2'] > 0), df_concat_trade['BAA2_1_2'],'')) df_concat_trade = df_concat_trade[df_concat_trade['Status_Check'] == 'keep'] # Create the final trading matrix; first grab the necessary columns, rename the columns and then pivot df_concat_trade_subset = df_concat_trade[['Export_BAA', 'Import_BAA', 'Final_Exchange']] df_concat_trade_subset.columns = ['Exporting_BAA', 'Importing_BAA', 'Amount'] df_trade_pivot = df_concat_trade_subset.pivot_table(index = 'Exporting_BAA', columns = 'Importing_BAA', values = 'Amount').fillna(0) # This cell continues formatting the df_trade # Find missing BAs - need to add them in so that we have a square matrix # Not all BAs are involved in transactions trade_cols = list(df_trade_pivot.columns.values) trade_rows = list(df_trade_pivot.index.values) trade_cols_set = set(trade_cols) trade_rows_set = set(trade_rows) trade_ba_ref_set = set(ba_cols) trade_col_diff = list(trade_ba_ref_set - trade_cols_set) trade_col_diff.sort(key = str.upper) trade_row_diff = list(trade_ba_ref_set - trade_rows_set) trade_row_diff.sort(key=str.upper) # Add in missing columns, then sort in alphabetical order for i in trade_col_diff: df_trade_pivot[i] = 0 df_trade_pivot = df_trade_pivot.sort_index(axis=1) # Add in missing rows, then sort in alphabetical order for i in trade_row_diff: df_trade_pivot.loc[i,:] = 0 df_trade_pivot = df_trade_pivot.sort_index(axis=0) # Add Canadian Imports to the trading matrix # CA imports are specified in an external file df_CA_Imports_Cols = pd.read_csv(data_dir + '/CA_Imports_Cols.csv', index_col = 0) df_CA_Imports_Rows = pd.read_csv(data_dir + '/CA_Imports_Rows.csv', index_col = 0) df_CA_Imports_Rows = df_CA_Imports_Rows[['us_ba', str(year)]] df_CA_Imports_Rows = df_CA_Imports_Rows.pivot(columns = 'us_ba', values = str(year)) df_concat_trade_CA = pd.concat([df_trade_pivot, df_CA_Imports_Rows]) df_concat_trade_CA = pd.concat([df_concat_trade_CA, df_CA_Imports_Cols], axis = 1) df_concat_trade_CA.fillna(0, inplace = True) df_trade_pivot = df_concat_trade_CA df_trade_pivot = df_trade_pivot.sort_index(axis=0) df_trade_pivot = df_trade_pivot.sort_index(axis=1) # Perform trading calculations as provided in Qu et al (2018) to # determine the composition of a BA consumption mix # Create total inflow vector x and then convert to a diagonal matrix x-hat logging.info("Inflow vector") x = [] for i in range (len(df_net_gen_sum)): x.append(df_net_gen_sum.iloc[i] + df_trade_pivot.sum(axis = 0).iloc[i]) x_np = np.array(x) # If values are zero, x_hat matrix will be singular, set BAAs with 0 to small value (1) df_x = pd.DataFrame(data = x_np, index = df_trade_pivot.index) df_x = df_x.rename(columns = {0:'inflow'}) df_x.loc[df_x['inflow'] == 0] = 1 x_np = df_x.values x_hat = np.diagflat(x_np) # Create consumption vector c and then convert to a digaonal matrix c-hat # Calculate c based on x and T logging.info("consumption vector") c = [] for i in range(len(df_net_gen_sum)): c.append(x[i] - df_trade_pivot.sum(axis = 1).iloc[i]) c_np = np.array(c) c_hat = np.diagflat(c_np) # Convert df_trade_pivot to matrix T = df_trade_pivot.values # Create matrix to split T into distinct interconnections - i.e., prevent trading between eastern and western interconnects # Connections between the western and eastern interconnects are through SWPP and WAUE logging.info("Matrix operations") interconnect = df_trade_pivot.copy() interconnect[:] = 1 interconnect.loc['SWPP',['EPE', 'PNM', 'PSCO', 'WACM']] = 0 interconnect.loc['WAUE',['WAUW', 'WACM']] = 0 interconnect_mat = interconnect.values T_split = np.multiply(T, interconnect_mat) # Matrix trading math (see Qu et al. 2018 ES&T paper) x_hat_inv = np.linalg.inv(x_hat) B = np.matmul(T_split, x_hat_inv) I = np.identity(len(df_net_gen_sum)) diff_I_B = I - B G = np.linalg.inv(diff_I_B) c_hat_x_hat_inv = np.matmul(c_hat, x_hat_inv) G_c = np.matmul(G, c_hat) H = np.matmul(G,c_hat, x_hat_inv) df_G = pd.DataFrame(G) df_B = pd.DataFrame(B) df_H = pd.DataFrame(H) # Convert H to pandas dataframe, populate index and columns df_final_trade_out = df_H df_final_trade_out.columns = df_net_gen_sum.index df_final_trade_out.index = df_net_gen_sum.index # Develop trading input for the eLCI code. Need to melt the dataframe to end up with a three column # dataframe:Repeat for both possible aggregation levels - BA and FERC market region # Establish a threshold of 0.00001 to be included in the final trading matrix # Lots of really small values as a result of the matrix calculate (e.g., 2.0e-15) df_final_trade_out_filt = df_final_trade_out.copy() col_list = df_final_trade_out.columns.tolist() #Adding in a filter for balancing authorities that are not associated #with any specific plants in EIA860 - there won't be any data for them in #the emissions dataframes. We'll set their quantities to 0 so that the #consumption mixes are made up of the rest of the incoming balancing #authority areas. eia860_bas=sorted( list(eia860_df["Balancing Authority Code"].dropna().unique()) +list(df_CA_Imports_Cols.columns) ) keep_rows = [x for x in df_final_trade_out_filt.index if x in eia860_bas] keep_cols = [x for x in df_final_trade_out_filt.columns if x in eia860_bas] df_final_trade_out_filt=df_final_trade_out_filt.loc[keep_rows,keep_cols] col_list = df_final_trade_out_filt.columns.tolist() for i in col_list: df_final_trade_out_filt[i] = np.where(df_final_trade_out_filt[i].abs()/df_final_trade_out_filt[i].sum() < 0.00001, 0, df_final_trade_out_filt[i].abs()) df_final_trade_out_filt = df_final_trade_out_filt.reset_index() df_final_trade_out_filt = df_final_trade_out_filt.rename(columns = {'index':'Source BAA'}) df_final_trade_out_filt_melted = df_final_trade_out_filt.melt(id_vars = 'Source BAA' , value_vars=col_list) df_final_trade_out_filt_melted = df_final_trade_out_filt_melted.rename(columns = {'Source BAA':'export BAA', 'variable':'import BAA'}) # Merge to bring in import region name matched with BAA df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted.merge(df_BA_NA, left_on = 'import BAA', right_on = 'BA_Acronym') df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'import ferc region', 'FERC_Region_Abbr':'import ferc region abbr'}, inplace=True) df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True) # Merge to bring in export region name matched with BAA df_final_trade_out_filt_melted_merge = df_final_trade_out_filt_melted_merge.merge(df_BA_NA, left_on = 'export BAA', right_on = 'BA_Acronym') if regions_to_keep is not None: # module_logger.info(f"{regions_to_keep}") # module_logger.info(f"{df_final_trade_out_filt_melted_merge['BA_Name'].unique()}") df_final_trade_out_filt_melted_merge=df_final_trade_out_filt_melted_merge.loc[df_final_trade_out_filt_melted_merge["BA_Name"].isin(regions_to_keep),:] df_final_trade_out_filt_melted_merge.rename(columns={'FERC_Region': 'export ferc region', 'FERC_Region_Abbr':'export ferc region abbr'}, inplace=True) df_final_trade_out_filt_melted_merge.drop(columns = ['BA_Acronym', 'BA_Name', 'NCR ID#', 'EIA_Region', 'EIA_Region_Abbr'], inplace = True) # if subregion == 'BA': # Develop final df for BAA BAA_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import BAA'])['value'].sum().reset_index() BAA_final_trade = df_final_trade_out_filt_melted_merge.copy() BAA_final_trade = BAA_final_trade.drop(columns = ['import ferc region', 'export ferc region', 'import ferc region abbr', 'export ferc region abbr']) BAA_final_trade = BAA_final_trade.merge(BAA_import_grouped_tot, left_on = 'import BAA', right_on = 'import BAA') BAA_final_trade = BAA_final_trade.rename(columns = {'value_x':'value','value_y':'total'}) BAA_final_trade['fraction'] = BAA_final_trade['value']/BAA_final_trade['total'] BAA_final_trade = BAA_final_trade.fillna(value = 0) BAA_final_trade = BAA_final_trade.drop(columns = ['value', 'total']) # Remove Canadian BAs in import list BAA_filt = BAA_final_trade['import BAA'].isin(eia860_bas) BAA_final_trade = BAA_final_trade[BAA_filt] # There are some BAs that will have 0 trade. Some of these are legitimate # Alcoa Yadkin has no demand (i.e., all power generation is exported) others # seem to be errors. For those BAs with actual demand, we'll set the # consumption mix to 100% from that BA. For those without demand, # fraction will be set to near 0 just to make sure systems can be built # in openLCA BAA_zero_trade = [x for x in list(BAA_final_trade["import BAA"].unique()) if BAA_final_trade.loc[BAA_final_trade["import BAA"]==x,"fraction"].sum()==0] BAAs_from_zero_trade_with_demand = [] for d_row in DEMAND_ROWS: if d_row["series_id"].split('.')[1].split('-')[0] in BAA_zero_trade: BAAs_from_zero_trade_with_demand.append(d_row["series_id"].split('.')[1].split('-')[0]) BAAs_from_zero_trade_with_demand = list(set(BAAs_from_zero_trade_with_demand)) del(DEMAND_ROWS) for baa in BAAs_from_zero_trade_with_demand: BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1 for baa in list(set(BAA_zero_trade)-set(BAAs_from_zero_trade_with_demand)): BAA_final_trade.at[(BAA_final_trade["import BAA"]==baa)&(BAA_final_trade["export BAA"]==baa),"fraction"]=1E-15 #Was later decided to not create consumption mixes for BAs that don't have imports. BAA_final_trade.drop(BAA_final_trade[BAA_final_trade["import BAA"]==baa].index,inplace=True) BAA_final_trade.to_csv(output_dir + '/BAA_final_trade_{}.csv'.format(year)) BAA_final_trade["export_name"]=BAA_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) BAA_final_trade["import_name"]=BAA_final_trade["import BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return BAA_final_trade # elif subregion == 'FERC': ferc_import_grouped_tot = df_final_trade_out_filt_melted_merge.groupby(['import ferc region'])['value'].sum().reset_index() # Develop final df for FERC Market Region ferc_final_trade = df_final_trade_out_filt_melted_merge.copy() # ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export ferc region','export ferc region abbr'])['value'].sum().reset_index() ferc_final_trade = ferc_final_trade.groupby(['import ferc region abbr', 'import ferc region', 'export BAA'])['value'].sum().reset_index() ferc_final_trade = ferc_final_trade.merge(ferc_import_grouped_tot, left_on = 'import ferc region', right_on = 'import ferc region') ferc_final_trade = ferc_final_trade.rename(columns = {'value_x':'value','value_y':'total'}) ferc_final_trade['fraction'] = ferc_final_trade['value']/ferc_final_trade['total'] ferc_final_trade = ferc_final_trade.fillna(value = 0) ferc_final_trade = ferc_final_trade.drop(columns = ['value', 'total']) # Remove Canadian entry in import list ferc_list.remove('CAN') ferc_filt = ferc_final_trade['import ferc region abbr'].isin(ferc_list) ferc_final_trade = ferc_final_trade[ferc_filt] ferc_final_trade.to_csv(output_dir + '/ferc_final_trade_{}.csv'.format(year)) ferc_final_trade["export_name"]=ferc_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return ferc_final_trade # elif subregion== 'US': us_import_grouped_tot = df_final_trade_out_filt_melted_merge['value'].sum() us_final_trade = df_final_trade_out_filt_melted_merge.copy() us_final_trade = us_final_trade.groupby(['export BAA'])['value'].sum().reset_index() us_final_trade["fraction"]=us_final_trade["value"]/us_import_grouped_tot us_final_trade = us_final_trade.fillna(value = 0) us_final_trade=us_final_trade.drop(columns = ["value"]) us_final_trade["export_name"]=us_final_trade["export BAA"].map(df_BA_NA[["BA_Acronym","BA_Name"]].set_index("BA_Acronym")["BA_Name"]) # return us_final_trade return {'BA':BAA_final_trade,'FERC':ferc_final_trade,'US':us_final_trade}