def Construction(calculation_years=range(2010, 2017)): # import 2012 Economic Census data. census_data = pd.concat( [cons.census(naics) for naics in [23, 236, 237, 238]], ignore_index=True) # Fill in missing values DE = cons.fill_in_missing_data(census_data, 'DE') DC = cons.fill_in_missing_data(census_data, 'DC') WV = cons.fill_in_missing_data(census_data, 'WV') census_data = census_data[(census_data.state_abbr != 'DE') & (census_data.state_abbr != 'DC') & (census_data.state_abbr != 'WV')] census_data = pd.concat([census_data, DE, DC, WV]) census_data.set_index('state', inplace=True) census_data = census_data.sort_index().reset_index() census_data = census_data.apply(pd.to_numeric, errors='ignore') # Calculate state-level energy use (all in MMBtu) # Diesel use diesel_state = cons.calc_diesel_state(census_data) # Natural gas use ng_state = cons.calc_ng_state(census_data) # Electricity use elect_state = cons.calc_elec_state(census_data) # Liquid petroleum gas use lpg_state = cons.calc_lpg_state(census_data) energy_state = pd.concat([diesel_state, ng_state, elect_state, lpg_state], axis=0, ignore_index=True) energy_state = cons.format_state_energy(energy_state) # Calculate GDP multiplier multiplier = cons.calc_bea_multiplier() cbp_2012 = get_cbp.CBP(2012).cbp # Calculate county fraction of state construction establishments by # NAICS code. county_frac = cons.calc_county_fraction(cbp_2012) county_frac.rename(columns={'naics': 'NAICS'}, inplace=True) # Calculate county energy cons_energy = cons.calc_county_energy(energy_state, county_frac, multiplier, calculation_years=range(2010, 2017)) # remove sector total (NAICS == 23) and reset index cons_energy = cons_energy[cons_energy.NAICS != 23].reset_index() cons_energy = dd.from_pandas(cons_energy.set_index('fipstate'), npartitions=len( cons_energy.fipstate.unique())) filename = 'cons_county_energy_'+\ dt.datetime.now().strftime('%Y%m%d_%H%M')+'.parquet.gzip' cons_energy.to_parquet('../results/' + filename, compression='gzip', engine='pyarrow')
import Match_GHGRP_County_IPH as county_matching import get_cbp import pandas as pd import datetime as dt today = dt.datetime.now().strftime('%Y%m%d-%H%M') # Import GHGRP energy data energy_ghgrp = pd.read_parquet('../results/ghgrp_energy_20200826-1725.parquet', engine='pyarrow') cbp = get_cbp.CBP(2014) # Import county business patterns data tcm = county_matching.County_matching(2014) # Instantiate matching methods # Match GHGRP facilities to their county ghgrp_matching = tcm.format_ghgrp(energy_ghgrp, cbp.cbp_matching) cbp.cbp_matching = tcm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching) # Adjust the CBP establishment counts based on GHGRP facilities cbp_corrected = tcm.correct_cbp(cbp.cbp_matching) # Import results of IPF algorithm applied to 2014 MECS. ipf_results_formatted = pd.read_csv( './calculation_data/mecs_2014_ipf_results_naics_employment.csv', index_col=0 ) def calculate_net_electricity(cbp_matching, ipf_results_formatted): """ Estimates net electricity by county, industry, and employment size class for all establishments. Applies net electricity intensities calculated from MECS to all establishments, including GHGRP facilities, unlike combustion emissions estimates.
import pandas as pd import Match_GHGRP_County_IPH as county_matching import get_cbp import Calculate_MfgEnergy_IPH # Code from run_all_IPH.py. Used to calculate MECS intensities based on # CBP establishment counts. energy_ghgrp = pd.read_parquet('../results/ghgrp_energy_20190801-2337.parquet', engine='pyarrow') cbp = get_cbp.CBP(2014) tcm = county_matching.County_matching(2014) ghgrp_matching = tcm.format_ghgrp(energy_ghgrp, cbp.cbp_matching) cbp.cbp_matching = tcm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching) cbp_corrected = tcm.correct_cbp(cbp.cbp_matching) tcmfg = Calculate_MfgEnergy_IPH.Manufacturing_energy(2014, energy_ghgrp) tcmfg.update_naics(ghgrp_matching) mecs_intensities = tcmfg.calc_intensities(cbp.cbp_matching) mecs_intensities = \ mecs_intensities[mecs_intensities.MECS_FT == 'Net_electricity'] # mecs_elec contains 'dummy' NAICS codes. Need to covert. naics_mappings = pd.read_csv('./calculation_data/mecs_naics_2012.csv', usecols=['MECS_NAICS_dummies', 'MECS_NAICS']) mecs_intensities = pd.merge(mecs_intensities, naics_mappings, on='MECS_NAICS_dummies', how='left') # Calculate establishment counts from county-level energy data, as # this represents processed CBP and GHGRP data.
def calc_ghgrp_intensities(self): """ NAICS codes of reported GHGRP data may be corrected based on Census County Business Patterns data. Final GHGRP GHG intensity and fuel disaggregation are based on these corrected NAICS codes. GHG intensity calculated by county, NAICS, and MECS_FT_byp. Fuel disaggregation and intensity includes end use. """ if self.year > 2012: naics_column = 'PRIMARY_NAICS_CODE_12' else: naics_column = 'PRIMARY_NAICS_CODE' # This is an updated ghgrp energy file. Bug was fixed on 5/5/2020 that # didn't capture MTCO2e_TOTAL values. Energy values are the same # as the original calculations. ghgrp_energy = pd.read_parquet( '../results/ghgrp_energy_20200826-1725.parquet', engine='pyarrow', columns=[ 'FACILITY_ID', 'REPORTING_YEAR', 'FUEL_TYPE', 'FUEL_TYPE_OTHER', 'FUEL_TYPE_BLEND', 'COUNTY_FIPS', 'MECS_Region', 'MTCO2e_TOTAL', 'PRIMARY_NAICS_CODE', 'SECONDARY_NAICS_CODE', 'MMBtu_TOTAL' ]) # Drop entries with zero calculated MMBtu ghgrp_energy = ghgrp_energy.loc[( ghgrp_energy[ghgrp_energy.MMBtu_TOTAL != 0].index), :] ghgrp_energy = pd.DataFrame( ghgrp_energy[ghgrp_energy.REPORTING_YEAR == self.year]) of = breakout_other_fuels.Other_fuels(2014) # Map aggregated fuel types to GHGRP fuel types ghgrp_energy = of.map_GHGRP_fueltypes(ghgrp_energy, 'MECS_FT_IPF.csv') # Map disaggregated fuel types to GHGRP fuel type ghgrp_energy = of.map_GHGRP_fueltypes(ghgrp_energy, 'MECS_FT_byp.csv') # Replace Biomass emissions with zero value ghgrp_energy.loc[ghgrp_energy.MECS_FT_byp == 'Biomass', 'MTCO2e_TOTAL'] = 0 # Sum emissions (MTCO2e) for specified year(s) ghgrp_ffc_emissions = ghgrp_energy.groupby( ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT', 'MECS_FT_byp']).MTCO2e_TOTAL.sum().dropna() # Calculate CO2e intensity (MTCO2e/MMBtu) ghgrp_CO2e_intensity = ghgrp_ffc_emissions.divide( ghgrp_energy.groupby( ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT', 'MECS_FT_byp']).MMBtu_TOTAL.sum().dropna()) ghgrp_CO2e_intensity.name = 'MTCO2e_per_MMBtu' ghgrp_CO2e_intensity = pd.DataFrame(ghgrp_CO2e_intensity) cbp = get_cbp.CBP(2014) tcm = county_matching.County_matching(2014) ghgrp_matching = tcm.format_ghgrp(ghgrp_energy, cbp.cbp_matching) # Update NAICS codes based on Census Business Patterns Data energy_ghgrp_matched = \ pd.merge(ghgrp_energy, ghgrp_matching[['FACILITY_ID', naics_column]], on='FACILITY_ID', how='left') energy_ghgrp_matched[naics_column] = \ energy_ghgrp_matched[naics_column].astype('int') naics6d = pd.DataFrame( energy_ghgrp_matched[naics_column].unique(), columns=[naics_column], index=range(0, len(energy_ghgrp_matched[naics_column].unique()))) naics6d = Match_MECS_NAICS.Match(naics6d, naics_column, naics_vintage=2012) energy_ghgrp_matched = pd.merge(energy_ghgrp_matched, naics6d, on=naics_column, how='left') # Filter out facilities that use PRIMARY_NAICS_CODE == 486210 and # NAICS_USED == 0 energy_ghgrp_matched = energy_ghgrp_matched[ (energy_ghgrp_matched[naics_column] != 486210) & (energy_ghgrp_matched.MECS_NAICS != 0)] if naics_column == 'PRIMARY_NAICS_CODE_12': energy_ghgrp_matched.drop('PRIMARY_NAICS_CODE', inplace=True, axis=1) energy_ghgrp_matched.rename( columns={'PRIMARY_NAICS_CODE_12': 'PRIMARY_NAICS_CODE'}, inplace=True) energy_ghgrp_y = energy_ghgrp_matched.groupby( [ 'REPORTING_YEAR', 'FACILITY_ID', 'MECS_Region', 'COUNTY_FIPS', 'PRIMARY_NAICS_CODE', 'MECS_NAICS', 'MECS_FT', 'MECS_FT_byp' ], as_index=False).MMBtu_TOTAL.sum() energy_ghgrp_y['COUNTY_FIPS'] = energy_ghgrp_y.COUNTY_FIPS.astype(int) energy_ghgrp_y.rename(columns={'PRIMARY_NAICS_CODE': 'naics'}, inplace=True) ghgrp_byp = energy_ghgrp_y.groupby( ['COUNTY_FIPS', 'naics', 'MECS_FT', 'MECS_FT_byp']).MMBtu_TOTAL.sum() ghgrp_byp = pd.DataFrame( ghgrp_byp.divide(ghgrp_byp.sum(level=[0, 1, 2]))) county_data = pd.read_parquet(self.county_data_file) county_data = county_data.groupby([ 'data_source', 'COUNTY_FIPS', 'naics', 'Emp_Size', 'End_use', 'MECS_FT' ], as_index=False).MMBtu.sum() final_ghgrp_fuel_disagg = \ county_data[county_data.data_source == 'ghgrp'].groupby( ['COUNTY_FIPS', 'naics', 'MECS_FT', 'End_use'] ).MMBtu.sum() final_ghgrp_fuel_disagg = final_ghgrp_fuel_disagg.divide( final_ghgrp_fuel_disagg.sum(level=[0, 1, 2])) final_ghgrp_fuel_disagg = pd.DataFrame( final_ghgrp_fuel_disagg.multiply(ghgrp_byp.MMBtu_TOTAL)) # energy_ghgrp_y.groupby( # ['COUNTY_FIPS', 'MECS_FT', 'naics', 'MECS_FT', 'MECS_FT_byp'] # ).MMBtu_TOTAL.sum() # # final_ghgrp_fuel_disagg = final_ghgrp_fuel_disagg.divide( # final_ghgrp_fuel_disagg.sum(level=[0,1,2,4]) # ).reset_index() final_ghgrp_fuel_disagg.rename(columns={0: 'MMBtu_fraction'}, inplace=True) final_ghgrp_fuel_disagg.dropna(inplace=True) final_ghgrp_fuel_disagg.to_csv(os.path.join( self.data_dir, 'ghgrp_fuel_disagg_' + str(self.year) + '.csv'), index=True) final_ghgrp_CO2e_intensity = pd.merge(energy_ghgrp_y.set_index( ['FACILITY_ID', 'REPORTING_YEAR', 'MECS_FT', 'MECS_FT_byp']), ghgrp_CO2e_intensity, left_index=True, right_index=True, how='left') # Remove MMBtu_TOTAL values of Zero final_ghgrp_CO2e_intensity = final_ghgrp_CO2e_intensity.loc[( final_ghgrp_CO2e_intensity[ final_ghgrp_CO2e_intensity.MMBtu_TOTAL != 0].index), :] # Created weighted average CO2e intensity by county and naics final_ghgrp_CO2e_intensity = pd.DataFrame( final_ghgrp_CO2e_intensity.groupby([ 'REPORTING_YEAR', 'COUNTY_FIPS', 'naics', 'MECS_FT', 'MECS_FT_byp' ]).apply(lambda x: np.average(x.MTCO2e_per_MMBtu, weights=x.MMBtu_TOTAL))) final_ghgrp_CO2e_intensity.rename(columns={0: 'MTCO2e_per_MMBtu'}, inplace=True) # Do a quick QA/QC on average emission factors of standard fuel types # If weighted average is +/- 20%, use EPA standard value. def calc_ef_range(x, plusminus=0.2): if x['MECS_FT_byp'] != 'Other': std_ef = self.std_efs[self.std_efs.MECS_FT == x['MECS_FT']].MTCO2e_per_MMBtu.values[0] elif x['MECS_FT_byp'] == 'Other': return x['MTCO2e_per_MMBtu'] else: std_ef = self.std_efs[ (self.std_efs.MECS_FT == x['MECS_FT']) & (self.std_efs.MECS_FT_byp == x['MECS_FT_byp'] )].MTCO2e_per_MMBtu.values[0] ef_range = [std_ef * (1 - plusminus), std_ef * (1 + plusminus)] if ef_range[0] <= x['MTCO2e_per_MMBtu'] <= ef_range[1]: return x['MTCO2e_per_MMBtu'] else: return std_ef final_ghgrp_CO2e_intensity.reset_index(inplace=True) final_ghgrp_CO2e_intensity['pass_qaqc'] = \ final_ghgrp_CO2e_intensity.apply(lambda x: calc_ef_range(x), axis=1) final_ghgrp_CO2e_intensity.MTCO2e_per_MMBtu.update( final_ghgrp_CO2e_intensity.pass_qaqc) final_ghgrp_CO2e_intensity.drop(['pass_qaqc'], axis=1, inplace=True) # fill biomass emission factor = 0 biomass = final_ghgrp_CO2e_intensity.where( final_ghgrp_CO2e_intensity.MECS_FT_byp == 'Biomass').dropna() biomass.loc[:, 'MTCO2e_per_MMBtu'] = 0 final_ghgrp_CO2e_intensity.update(biomass) # Save results final_ghgrp_CO2e_intensity.to_csv(os.path.join( self.data_dir, 'ghgrp_CO2e_intensity_' + str(self.year) + '.csv'), index=False) return final_ghgrp_CO2e_intensity, final_ghgrp_fuel_disagg
def Manufacturing(calculation_years=range( 2010, 2017)): #CBP data only through 2016; 2017 scheduled for Nov 2019 energy_ghgrp = pd.read_parquet( '../results/ghgrp_energy_20191108-1416.parquet', engine='pyarrow') for y in calculation_years: print(y) print("getting cbp") cbp = get_cbp.CBP(y) cm = county_matching.County_matching(y) print("matching ghgrp") ghgrp_matching = cm.format_ghgrp(energy_ghgrp, cbp.cbp_matching) # Instantiate class for a single year cmfg = Calculate_MfgEnergy_IEDB.Manufacturing_energy(y, energy_ghgrp) # update NAICS codes for energy_ghgrp based on ghgrp_matching print("updating naics") cmfg.update_naics(ghgrp_matching) # Separate process for combustion fuels print("counting ghgrps") cbp_matching_counts = cm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching) # cbp.cbp_matching = cm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching) print("correcting naics") # cbp_corrected = cm.correct_cbp(cbp.cbp_matching) cbp_corrected = cm.correct_cbp(cbp_matching_counts) print("cbp_matching cols, ", cbp.cbp_matching.columns) #Export cbp_corrected for 2012 for mining if y == 2012: cbp_corrected[(cbp_corrected.naics > 210000) & \ (cbp_corrected.naics < 220000)][ ['fipstate', 'fipscty', 'naics', 'est', 'COUNTY_FIPS'] ].to_csv( '../calculation_data/cbp_corrected_mining.csv' ) # Run IPF only for MECS years, 2010 and 2014 if (y == 2010) | (y == 2014): seed_methods = ipf_seed.IPF_seed(year=y) seed_df = seed_methods.create_seed(cbp_matching_counts) ipf_methods = ipf.IPF(y, table3_2=seed_methods.table3_2, table3_3=seed_methods.table3_3) # Run IPF. Saves resulting energy values as csv ipf_methods.mecs_ipf(seed_df) mecs_intensities = cmfg.calc_intensities(cbp_matching_counts) mecs_intensities.to_pickle('mecs_intensities.pkl') else: mecs_intensities = pd.read_pickle('mecs_intensities.pkl') # Calculates non-ghgrp combustion energy use and combines with # ghgrp energy use. Distinguishes between data sources with 'data_source' # column. # This is a dask dataframe partitioned by STATE print("calculating combustion") mfg_comb_energy = cmfg.combfuel_calc(cbp_corrected, mecs_intensities) mfg_comb_energy['year'] = y #EIA electricity data; dask dataframe, partitioned by STATE print("calculating electricity") ghgrp_electricity, elect_fac_ids = cmfg.GHGRP_electricity_calc() # GHGRP matching for EIA electricity data ghgrp_matching_923 = pd.DataFrame( ghgrp_matching[ghgrp_matching.FACILITY_ID.isin(elect_fac_ids)]) print('cbp_matching: ', cbp.cbp_matching.columns) cbp_matching_923 = cm.ghgrp_counts(cbp.cbp_matching, ghgrp_matching_923) cbp_corrected_923 = cm.correct_cbp(cbp_matching_923) #estimate non-ghgrp electricity use. Dask dataframe partitioned by STATE mfg_elect_energy = cmfg.electricity_calc(cbp_corrected_923, mecs_intensities) mfg_elect_energy['year'] = y if y == calculation_years[0]: mfg_energy = dd.multi.concat( [mfg_comb_energy, mfg_elect_energy, ghgrp_electricity], axis=0, join='outer', interleave_partitions=True) # mfg_energy = mfg_energy.append(ghgrp_electricity, # interleave_partions=True) else: mfg_energy = dd.multi.concat([ mfg_energy, mfg_comb_energy, mfg_elect_energy, ghgrp_electricity ], axis=0, join='outer', interleave_partitions=True) # # mfg_energy = mfg_energy.append(mfg_elect_energy, # interleave_partitions=True) # # mfg_energy = mfg_energy.append(ghgrp_electricity, # interleave_partitions=True) # # mfg_energy = mfg_energy.calculate()[0] return mfg_energy