class WeatherFactors: def __init__(self, sector, directory, activity_data=None, residential_floorspace=None, nominal_energy_intensity=None, \ end_year=2018): self.end_year = end_year self.directory = directory self.sector = sector self.activity_data = activity_data self.nominal_energy_intensity = nominal_energy_intensity self.residential_floorspace = residential_floorspace self.eia_data = GetEIAData(self.sector) self.lmdi_prices = pd.read_excel(f'{self.directory}/EnergyPrices_by_Sector_010820_DBB.xlsx', sheet_name='LMDI-Prices', header=14, usecols='A:B, EY') self.regions_subregions = ['northeast', 'new_england', 'middle_atlantic', 'midwest', 'east_north_central', 'west_north_central', 'south', 'south_atlantic', 'east_south_central', 'west_south_central', 'west', 'mountain', 'pacific'] self.sub_regions_dict = {'northeast': ['New England', 'Middle Atlantic'], 'midwest': ['East North Central', 'West North Central'], 'south': ['South Atlantic', 'East South Central', 'West South Central'], 'west': ['Mountain', 'Pacific']} @staticmethod def adjust_data(subregions, hdd_by_division, hdd_activity_weights, cooling=True, cdd_by_division=None, \ cdd_activity_weights=None, use_weights_1961_90=True): """Calculate weights for adjusted weather factors prediction """ years_1961_90 = list(range(1961, 1990 + 1)) years_1981_2010 = list(range(1981, 1990 + 1)) if cooling: cdd_by_division = cdd_by_division.set_index('Year') cdd_by_division.index = cdd_by_division.index.astype(int) averages_1961_90_cooling = cdd_by_division.loc[years_1961_90, :].mean(axis=0) averages_1981_2010_cooling = cdd_by_division.loc[years_1981_2010, :].mean(axis=0) hdd_by_division = hdd_by_division.set_index('Year') hdd_by_division.index = hdd_by_division.index.astype(int) averages_1961_90_heating = hdd_by_division.loc[years_1961_90, :].mean(axis=0) averages_1981_2010_heating = hdd_by_division.loc[years_1981_2010, :].mean(axis=0) all_s_weights_heating = [] all_s_weights_cooling = [] for s in subregions: if use_weights_1961_90: subregion_weights_heating = averages_1961_90_heating.loc[s] * hdd_activity_weights[s] if cooling: subregion_weights_cooling = averages_1961_90_cooling.loc[s] * cdd_activity_weights[s] all_s_weights_cooling.append(subregion_weights_cooling) else: subregion_weights_heating = averages_1981_2010_heating.loc[s] * hdd_activity_weights[s] if cooling: subregion_weights_cooling = averages_1981_2010_cooling.loc[s] * cdd_activity_weights[s] all_s_weights_cooling.append(subregion_weights_cooling) all_s_weights_heating.append(subregion_weights_heating) weights_dict = dict() if cooling: weights_cooling = sum(all_s_weights_cooling) weights_dict['cooling'] = weights_cooling weights_heating = sum(all_s_weights_heating) weights_dict['heating'] = weights_heating return weights_dict def process_prices(self, weather_factors_df): """TODO: Are distributed lag and time cubed ever the desired variable? Does this method need to exist? """ lmdi_prices = self.lmdi_prices # distributed_lag = # time_cubed = selected_variable = [1] * len(weather_factors_df) return selected_variable @staticmethod def cbecs_1995_shares(): """Calculate fuels and elec shares for the commercial sector from CBECS 1995 data """ electricty_consumption_tbtu = {'Northeast': 436, 'Midwest': 558, 'South': 1027, 'West': 587} electricty_consumption_tbtu['Total'] = sum(electricty_consumption_tbtu.values()) electricity_df = pd.DataFrame.from_dict(electricty_consumption_tbtu, orient='index', \ columns=['electricity_consumption_tbtu']) energy_tbtu = {'Northeast': 1035, 'Midwest': 1497, 'South': 1684, 'West': 1106} energy_tbtu['Total'] = sum(energy_tbtu.values()) energy_df = pd.DataFrame.from_dict(energy_tbtu, orient='index', columns=['energy']) shares_df = energy_df.merge(electricity_df, left_index=True, right_index=True, how='outer') shares_df['elec_share'] = shares_df.electricity_consumption_tbtu.divide(shares_df.loc['Total', \ 'electricity_consumption_tbtu']) shares_df['fuel_consumption'] = shares_df.energy.subtract(shares_df.electricity_consumption_tbtu) shares_df['fuels_share'] = shares_df.fuel_consumption.divide(shares_df.loc['Total', 'fuel_consumption']) return shares_df @staticmethod def recs_1993_shares(): """Calculate fuels and elec shares for the residential sector from RECS 1993 data """ electricty_consumption_tbtu = {'Northeast': 470, 'Midwest': 740, 'South': 1510, 'West': 560} electricty_consumption_tbtu['Total'] = sum(electricty_consumption_tbtu.values()) electricity_df = pd.DataFrame.from_dict(electricty_consumption_tbtu, orient='index', \ columns=['electricity_consumption_tbtu']) energy_tbtu = {'Northeast': 2380, 'Midwest': 3130, 'South': 2950, 'West': 1550} energy_tbtu['Total'] = sum(energy_tbtu.values()) energy_df = pd.DataFrame.from_dict(energy_tbtu, orient='index', columns=['energy']) shares_df = energy_df.merge(electricity_df, left_index=True, right_index=True, how='outer') shares_df['elec_share'] = shares_df.electricity_consumption_tbtu.divide(shares_df.loc['Total', \ 'electricity_consumption_tbtu']) shares_df['fuel_consumption'] = shares_df.energy.subtract(shares_df.electricity_consumption_tbtu) shares_df['fuels_share'] = shares_df.fuel_consumption.divide(shares_df.loc['Total', 'fuel_consumption']) return shares_df def regional_shares(self, dataframe, cols): """Calulate shares of regional totals by subregion """ dataframe = dataframe.set_index('regions_subregions') weights_data = dict() for col in cols: shares_dict = dict() for r_, subregions in self.sub_regions_dict.items(): subregions = [s.lower().replace(' ', '_') for s in subregions] regions_ = subregions + [r_] region_total = dataframe.loc[r_, col] for r in regions_: share_value = dataframe.loc[r, col] / region_total shares_dict[r] = share_value weights_data[col] = shares_dict return weights_data def gather_weights_data(self): """Calculate weights to aggregate subregions into four regions """ if self.sector == 'residential': electricity_data = {'total_elec_tbtu': {'northeast': 470, 'midwest': 740, 'south': 1510, 'west': 560}, 'heating_tbtu': {'northeast': 12 * 3.412, 'midwest': 22 * 3.412, 'south': 61 * 3.412, 'west': 25 * 3.412}, 'cooling_tbtu': {'northeast': 40, 'midwest': 80, 'south': 310, 'west': 30}} fuels_data = {'all_energy_tbtu': {'northeast': 2380, 'midwest': 3130, 'south': 2950, 'west': 1550}, 'electricity_tbtu': {'northeast': 470, 'midwest': 740, 'south': 1510, 'west': 560}, 'heating_all_energy_tbtu': {'northeast': 1490, 'midwest': 1920, 'south': 1210, 'west': 700}} # Residential Heating Households Millions heating_activity = [4.1, 1, 3.1, 5.8, 3.5, 2.4, 18.8, 10.7, 3.4, 4.8, 8.3, 2, 6.3] # Residential Cooling Households Millions cooling_activity = [10.9, 2.1, 8.8, 16.4, 10.8, 5.6, 29.4, 15, 5.3, 9.2, 7.1, 2.1, 5.1] all_energy = [19.1, 4.9, 14.2, 23.2, 16.3, 6.9, 32.8, 16.8, 5.9, 10.1, 19.4, 5.3, 14.1] electricity = [1.9, 0.5, 1.4, 2.9, 1.6, 1.3, 14.6, 8.7, 2.5, 3.4, 5.6, 1.4, 4.2] elif self.sector == 'commercial': electricity_data = {'total_elec_tbtu': {'northeast': 436, 'midwest': 558, 'south': 1027, 'west': 587}, 'heating_tbtu': {'northeast': 18, 'midwest': 23, 'south': 43, 'west': 28}, 'cooling_tbtu': {'northeast': 44, 'midwest': 60, 'south': 172, 'west': 64}} fuels_data = {'all_energy_tbtu': {'northeast': 1035, 'midwest': 1497, 'south': 1684, 'west': 1106}, 'electricity_tbtu': {'northeast': 436, 'midwest': 558, 'south': 1027, 'west': 587}, 'heating_all_energy_tbtu': {'northeast': 385, 'midwest': 668, 'south': 376, 'west': 275}} # Commercial Heating Floorspace Million SF heating_activity = [657, 137, 520, 779, 345, 434, 3189, 1648, 1140, 401, 1219, 469, 750] # Commercial Cooling Floorspace Million SF cooling_activity = [5919, 1472, 4447, 10860, 7301, 3559, 13666, 6512, 3265, 3889, 7058, 2812, 4246] all_energy = [7661, 2031, 5630, 10860, 7301, 3559, 13666, 6512, 3265, 3889, 7065, 2819, 4246] electricity = [657, 137, 520, 779, 345, 434, 3189, 1648, 1140, 401, 1219, 469, 750] else: return None weights_data_ = {'regions_subregions': self.regions_subregions, 'heating_activity': heating_activity, 'cooling_activity': cooling_activity, 'all_energy': all_energy, 'electricity': electricity} weights_df = pd.DataFrame(data=weights_data_) weights_df['fuels'] = weights_df['all_energy'].subtract(weights_df['electricity']) return weights_df def heating_cooling_data(self): hdd_by_division_historical = pd.read_csv('./Data/historical_hdd_census_division.csv').set_index('Year') cdd_by_division_historical = pd.read_csv('./Data/historical_cdd_census_division.csv').set_index('Year') hdd_by_division = self.eia_data.eia_api(id_='1566347', id_type='category') hdd_to_drop = [c for c in list(hdd_by_division.columns) if 'Monthly' in c] hdd_by_division = hdd_by_division.drop(hdd_to_drop, axis=1) hdd_rename_dict = {c: c.replace(', Annual, Number', '') for c in list(hdd_by_division.columns)} hdd_by_division = hdd_by_division.rename(columns=hdd_rename_dict) hdd_by_division = pd.concat([hdd_by_division_historical, hdd_by_division], sort=True) cdd_by_division = self.eia_data.eia_api(id_='1566348', id_type='category') cdd_to_drop = [c for c in list(cdd_by_division.columns) if 'Monthly' in c] cdd_by_division = cdd_by_division.drop(cdd_to_drop, axis=1) cdd_rename_dict = {c: c.replace(', Annual, Number', '') for c in list(cdd_by_division.columns)} cdd_by_division = cdd_by_division.rename(columns=cdd_rename_dict) cdd_by_division = pd.concat([cdd_by_division_historical, cdd_by_division], sort=True) title_case_regions = [s.replace('_', ' ').title() for s in self.regions_subregions] hdd_names = [f'Heating Degree-Days, {r}' for r in title_case_regions] cdd_names = [f'Cooling Degree-Days, {r}' for r in title_case_regions] hdd_new_names_dict = {name: name_title for name, name_title in zip(hdd_names, title_case_regions)} cdd_new_names_dict = {name: name_title for name, name_title in zip(cdd_names, title_case_regions)} hdd_by_division = hdd_by_division.rename(columns=hdd_new_names_dict) cdd_by_division = cdd_by_division.rename(columns=cdd_new_names_dict) return hdd_by_division, cdd_by_division def estimate_regional_shares(self): """Spreadsheet equivalent: Commercial --> 'Regional Shares' assumed commercial floorspace in each region follows same trends as population or housing units""" regions = ['Northeast', 'Midwest', 'South', 'West'] cbecs_data = pd.read_csv('./Data/cbecs_data_millionsf.csv').set_index('Year') cbecs_data.index = cbecs_data.index.astype(str) cbecs_years = list(cbecs_data.index) cbecs_data = cbecs_data.rename(columns={'Midwest ': 'Midwest', ' South': 'South', ' West': 'West'}) cbecs_data.loc['1979', regions] = cbecs_data.loc['1983', regions].subtract([826, 972, 2665, 1212]) cbecs_data.loc['1979', ['U.S.']] = sum(cbecs_data.loc['1979', regions].values) cbecs_data['U.S. (calc)'] = cbecs_data.sum(axis=1) comm_regional_shares = cbecs_data.drop(['U.S.', 'U.S. (calc)'], axis=1).divide(cbecs_data['U.S. (calc)'].values.reshape(len(cbecs_data), 1)) comm_regional_shares_ln = np.log(comm_regional_shares) residential_data = ResidentialFloorspace(end_year=self.end_year) # change to pull from residential().activity() final_results_total_floorspace_regions, regional_estimates_all, avg_size_all_regions = residential_data.final_floorspace_estimates() regional_dfs = [regional_estimates_all[r][['Total']].rename(columns={'Total': r}) for r in regions] residential_housing_units = reduce(lambda x, y: pd.merge(x, y, left_index=True, right_index=True, how='outer'), regional_dfs) residential_housing_units['U.S.'] = residential_housing_units.sum(axis=1) residential_housing_units.index = residential_housing_units.index.astype(str) regional_shares_residential_housing_units = residential_housing_units.drop('U.S.', axis=1).divide(residential_housing_units['U.S.'].values.reshape(len(residential_housing_units), 1)) regional_shares_residential_housing_units_ln = np.log(regional_shares_residential_housing_units) regional_shares_residential_housing_units_cbecs_years = regional_shares_residential_housing_units.loc[cbecs_years, :] regional_shares_residential_housing_units_cbecs_years_ln = np.log(regional_shares_residential_housing_units_cbecs_years) predictions_df = pd.DataFrame(columns=comm_regional_shares.columns, index=residential_housing_units.index) for region in comm_regional_shares.columns: x_values = comm_regional_shares_ln[region].values X = x_values.transpose() y = regional_shares_residential_housing_units_cbecs_years_ln[region].values p = np.polyfit(X, y, 1) predictions_df[region] = np.exp(regional_shares_residential_housing_units_ln[region].multiply(p[0]).add(p[1])) predictions_df['Predicted Sum'] = predictions_df.sum(axis=1) normalized_shares = predictions_df.drop('Predicted Sum', axis=1).divide(predictions_df['Predicted Sum'].values.reshape(len(predictions_df), 1)) return normalized_shares def commercial_estimate_regional_floorspace(self): regional_shares = self.estimate_regional_shares() commercial_floorspace = self.activity_data regional_shares_index = regional_shares.index.astype(str) commercial_floorspace_reshape = commercial_floorspace.loc[regional_shares_index, :] regional_floorspace = regional_shares.multiply(commercial_floorspace_reshape.values) return regional_floorspace def commercial_regional_intensity_aggregate(self): """Calculate Energy Intensities (kBtu/sq. ft.) by region and fuel type (i.e. Fuels and Electricity) for use in calculating weather factors Returns: dictionary with keys: 'electricity' and 'fuels', values: dataframes of intensity data for the commercial sector with Year index and Region columns """ regional_floorspace = self.commercial_estimate_regional_floorspace() total_fuels_to_indicators, elec_to_indicators = self.eia_data.get_seds() regional_floorspace_index = regional_floorspace.index elec_to_indicators = elec_to_indicators.loc[regional_floorspace_index, :] total_fuels_to_indicators = total_fuels_to_indicators.loc[regional_floorspace_index, :] fuels_regional = regional_floorspace.multiply(total_fuels_to_indicators.drop('National', axis=1).values) elec_regional = regional_floorspace.multiply(elec_to_indicators.drop('National', axis=1).values) return {'fuels': fuels_regional, 'electricity': elec_regional} def residential_regional_intensity_aggregate(self): """This function does not need to exist if nominal_energy_intensity is properly formated, change formatting here if not Returns: dictionary with keys: 'electricity' and 'fuels', values: dataframes of intensity data for the residential sector with Year index and Region columns i.e. {'fuels': fuels_regional, 'electricity': elec_regional} """ nominal_energy_intensity = self.nominal_energy_intensity # nominal_energy_intensity should already be formated in this way return nominal_energy_intensity def weather_factors(self, region, energy_type, actual_intensity, weights_df, regional_weights): """Estimate a simple regression model to fit the regional intensity to a linear function of time (included squared and cubed values of time) and degree days. -electricity model: constant term, heating degree day (HDD), cooling degree day (CDD), time, time-squared, and time-cubed -fuels model: contant term?, HDD, HDD*Time, Time, Time-squared and composite fuel price index (the composite fuel price index was developed as a weighted average of the national distillate fuel oil price index and a national average price for natural gas) Weather factors are applied at the regional level to generate the weather-normalized intensity indexes for each of the four Census regions -The weather factors for delivered energy and source energy are computed implicitly. For delivered energy, they are calculated as the sum of reported electricity and fuels divided by the sum of the weather-adjusted electricity and weather-adjusted fuels. A similar procedure is followed for source energt. As such, the implied weather factors are a result of the process, not an independent variable that influences the values of intensity indexes for delivered energy and source energy. All of these computation occur within Commercial_Total worksheet. TODO: Input data """ if energy_type == 'electricity': energy_type = 'elec' subregions = self.sub_regions_dict[region] subregions_lower = [s.lower().replace(' ', '_') for s in subregions] hdd_activity_weights = [regional_weights['heating_activity'][r_] for r_ in subregions_lower] hdd_activity_weights_dict = {r : regional_weights['heating_activity'][r_] for r, r_ in zip(subregions, subregions_lower)} cdd_activity_weights = [regional_weights['cooling_activity'][r_] for r_ in subregions_lower] cdd_activity_weights_dict = {r : regional_weights['cooling_activity'][r_] for r, r_ in zip(subregions, subregions_lower)} fuels_weights = [regional_weights['fuels'][r_] for r_ in subregions_lower] hdd_by_division, cdd_by_division = self.heating_cooling_data() heating_degree_days = hdd_by_division[subregions] heating_degree_days = heating_degree_days.reset_index('Year') heating_degree_days[region] = heating_degree_days[subregions].dot(hdd_activity_weights) fuels_heating_degree_days = heating_degree_days fuels_heating_degree_days[region] = fuels_heating_degree_days[subregions].dot(fuels_weights) weather_factors_df = heating_degree_days[['Year', region]].rename(columns={region: 'HDD'}) weather_factors_df['Year'] = weather_factors_df['Year'].astype(int) weather_factors_df['Time'] = weather_factors_df['Year'].values - 1969 weather_factors_df['Time^2'] = weather_factors_df[['Time']].pow(2).values if energy_type == 'elec': cooling_degree_days = cdd_by_division[subregions] cooling_degree_days[region] = cooling_degree_days[subregions].dot(cdd_activity_weights) cooling_degree_days = cooling_degree_days.reset_index('Year') cooling_degree_days['Year'] = cooling_degree_days['Year'].astype(int) weather_factors_df_cooling = cooling_degree_days[['Year', region]].rename(columns={region: 'CDD'}) weather_factors_df = weather_factors_df.merge(weather_factors_df_cooling, on='Year', how='outer') weather_factors_df['Time^3'] = weather_factors_df[['Time']].pow(3).values weather_factors_df = weather_factors_df.set_index('Year') weather_factors_df.index = weather_factors_df.index.astype(int) X_data = weather_factors_df[['HDD', 'CDD', 'Time', 'Time^2', 'Time^3']] elif energy_type == 'fuels': weather_factors_df['HDD*Time'] = heating_degree_days[region].multiply(weather_factors_df['Time']) weather_factors_df['Price'] = self.process_prices(weather_factors_df) weather_factors_df = weather_factors_df.set_index('Year') weather_factors_df.index = weather_factors_df.index.astype(int) X_data = weather_factors_df[['HDD', 'HDD*Time', 'Time', 'Time^2', 'Price']] # elif self.energy_type == 'delivered': # weather_factor = (reported_electricity + fuels) / (weather_adjusted_electrity + weather_adjusted_fuels) # return weather_factor else: raise KeyError(f'Missing valid energy type. Type given: {energy_type}') actual_intensity.index = actual_intensity.index.astype(int) data = X_data.merge(actual_intensity, left_index=True, right_index=True, how='inner').dropna() X = data.drop(region.capitalize(), axis=1) Y = data[[region.capitalize()]] reg = linear_model.LinearRegression() reg.fit(X, Y) coefficients = reg.coef_ coefficients = coefficients[0] intercept = reg.intercept_ predicted_value_intensity_actualdd = reg.predict(X) # Predicted value of the intensity based on actual degree days if energy_type == 'elec': prediction2_weights = self.adjust_data(subregions=subregions, hdd_by_division=heating_degree_days, cdd_by_division=cooling_degree_days, cdd_activity_weights=cdd_activity_weights_dict, hdd_activity_weights=hdd_activity_weights_dict, use_weights_1961_90=True) predicted_value_intensity_ltaveragesdd = intercept + coefficients[0] * prediction2_weights['heating'] + coefficients[1] * prediction2_weights['cooling'] + \ coefficients[2] * data['Time'] + coefficients[3] * data['Time^2'] + coefficients[4] * data['Time^3'] # Predicted value of the intensity based on the long-term averages of the degree days elif energy_type == 'fuels': prediction2_weights = self.adjust_data(subregions=subregions, hdd_by_division=heating_degree_days, hdd_activity_weights=hdd_activity_weights_dict, cooling=False, use_weights_1961_90=True) predicted_value_intensity_ltaveragesdd = intercept + coefficients[0] * prediction2_weights['heating'] + coefficients[1] * data['Time'] + \ coefficients[2] * data['Time'] + coefficients[3] * data['Time^2'] + coefficients[4] * data['Price'] # Predicted value of the intensity based on the long-term averages of the degree days weather_factor = predicted_value_intensity_actualdd.flatten() / predicted_value_intensity_ltaveragesdd.values.flatten() try: weather_normalized_intensity = actual_intensity.loc[data.index].divide(weather_factor.reshape(len(weather_factor), 1)) except Exception: try: weather_normalized_intensity = actual_intensity.loc[data.index].divide(weather_factor) except Exception as e: raise ValueError(f'Failure to divide: {actual_intensity.shape} by {weather_factor.shape}, failed with error {e}') weather_factor_df = pd.DataFrame(data={'Year': data.index, f'{region}_weather_factor': weather_factor}).set_index('Year') return weather_factor_df, weather_normalized_intensity def national_method1_fixed_end_use_share_weights(self, energy_type_): """Used fixed weights to develop from regional factors, weighted by regional energy share from 1995 CBECS """ if self.sector == 'commercial': shares = self.cbecs_1995_shares() intensity_df = self.commercial_regional_intensity_aggregate() elif self.sector == 'residential': intensity_df = self.residential_regional_intensity_aggregate() shares = self.recs_1993_shares() if energy_type_ == 'elec': energy_type = 'electricity' else: energy_type = energy_type_ regional_weather_factors = [] weights_df = self.gather_weights_data() regional_weights = self.regional_shares(dataframe=weights_df, cols=['heating_activity', 'cooling_activity', 'fuels']) for region in self.sub_regions_dict.keys(): region_cap = region.capitalize() if self.sector == 'residential': regional_intensity = intensity_df[region_cap][energy_type_] elif self.sector == 'commercial': regional_intensity = intensity_df[energy_type_][region_cap] weather_factors, weather_normalized_intensity = self.weather_factors(region, energy_type_, actual_intensity=regional_intensity, weights_df=weights_df, regional_weights=regional_weights) regional_weather_factors.append(weather_factors) weather_factors_all = pd.concat(regional_weather_factors, axis=1) weather_factors_all = weather_factors_all.reindex(columns=list(weather_factors_all.columns) + [f'{energy_type_}_weather_factor']) for y in weather_factors_all.index: if energy_type == 'electricity': energy_type = 'elec' share_name = f'{energy_type}_share' year_weather = weather_factors_all.drop(f'{energy_type_}_weather_factor', axis=1).loc[y, :] weights = shares[share_name].drop('Total') year_factor = year_weather.dot(weights.to_numpy()) weather_factors_all.loc[y, [f'{energy_type_}_weather_factor']] = year_factor return weather_factors_all def national_method2_regression_models(self, seds_data, weather_factors): seds_data, weather_factors = CalculateLMDI.ensure_same_indices(seds_data, weather_factors) weather_adjusted_consumption = seds_data.drop('National', axis=1).multiply(weather_factors.values) weather_adjusted_consumption['National'] = weather_adjusted_consumption.sum(axis=1) implicit_national_weather_factor = seds_data[['National']].divide(weather_adjusted_consumption['National'].values.reshape(len(weather_adjusted_consumption), 1)) return implicit_national_weather_factor def adjust_for_weather(self, data, energy_type): """Adjust data by weather factors Parameters ---------- data: dataframe dataset to adjust by weather weather_factors: array? Returns ------- weather_adjusted_data: dataframe ? """ weather_factors = self.national_method1_fixed_end_use_share_weights(energy_type) weather_adjusted_data = data / weather_factors[energy_type] return weather_adjusted_data def get_weather(self, energy_dict=None, energy_type=None, energy_df=None, weather_adjust=False, seds_data=None): if self.sector == 'residential': if weather_adjust: for type, energy_dataframe in energy_dict.items(): weather_adj_energy = self.adjust_for_weather(energy_dataframe, type) energy_dict[f'{type}_weather_adj'] = weather_adj_energy return energy_dict else: weather_factors = dict() for type in energy_dict.keys(): weather_factors_t = self.national_method1_fixed_end_use_share_weights(energy_type_=type) if type == 'electricity': type = 'elec' weather_factors[type] = weather_factors_t return weather_factors elif self.sector == 'commercial': weather_factors = dict() for type in ['electricity', 'fuels']: weather_factors_method1 = self.national_method1_fixed_end_use_share_weights(type) early_years = range(min(weather_factors_method1.index), 1969 + 1) weather_factors_early = weather_factors_method1.loc[early_years, [f'{type}_weather_factor']] weather = weather_factors_method1.drop(f'{type}_weather_factor', axis=1) if type == 'electricity': type = 'elec' type_seds = seds_data[type] weather_factors_method2 = self.national_method2_regression_models(seds_data=type_seds, weather_factors=weather) weather_factors_method2 = weather_factors_method2.rename(columns={'National': f'{type}_weather_factor'}) late_years = range(1970, max(weather_factors_method2.index) + 1) weather_factors_late = weather_factors_method2.loc[late_years] weather_factors_t = pd.concat([weather_factors_early, weather_factors_late], sort=True) weather_factors[type] = weather_factors_t return weather_factors
class CommercialIndicators(CalculateLMDI): """ Data Sources: - New construction is based on data from Dodge Data and Analytics. Dodge data on new floor space additions is available from the published versions of the Statistical Abstract of the United States (SAUS). The Most recent data is from the 2020 SAUS, Table 995 "Construction Contracts Started- Value of the Construction and Floor Space of Buildings by Class of Construction: 2014 to 2018". """ def __init__(self, directory, output_directory, level_of_aggregation, lmdi_model=['multiplicative'], end_year=2018, base_year=1985): self.end_year = end_year self.sub_categories_list = { 'Commercial_Total': None } #, 'Total_Commercial_LMDI_UtilAdj': None} self.eia_comm = GetEIAData('commercial') self.energy_types = ['elec', 'fuels', 'deliv', 'source', 'source_adj'] super().__init__(sector='commercial', level_of_aggregation=level_of_aggregation, lmdi_models=lmdi_model, directory=directory, output_directory=output_directory, categories_dict=self.sub_categories_list, energy_types=self.energy_types, base_year=base_year, end_year=end_year) # self.cbecs = # self.residential_housing_units = [0] # Use regional estimates of residential housing units as interpolator, extrapolator via regression model # self.mer_data23_May_2016 = GetEIAData.eia_api(id_='711251') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251' # self.mer_data23_Jan_2017 = GetEIAData.eia_api(id_='711251') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251' # self.mer_data23_Dec_2019 = GetEIAData.eia_api(id_='711251') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711251' # self.AER11_Table21C_Update = GetEIAData.eia_api(id_='711251') # Estimates? def collect_input_data(self, dataset_name): datasets = \ {'national_calibration': self.eia_comm.national_calibration(), 'SEDS_CensusRgn': self.eia_comm.get_seds(), 'mer_data_23': self.eia_comm.eia_api(id_='711251', id_type='category')} return datasets[dataset_name] def adjusted_supplier_data(self): """ This worksheet adjusts some of commercial energy consumption data as reported in the Annual Energy Review. These adjustments are based upon state-by-state analysis of energy consumption in the industrial and commercial sectors. For electricity, there have been a number of reclassifications by utilities since 1990 that has moved sales from the industrial sector to the commercial sector. The adjustment for electricity consumption is based upon a state-by-state examination of commercial and electricity sales from 1990 through 2011. This data is collected by EIA via Survey EIA-861. Significant discontinuities in the sales data from one year to the next were removed. In most cases, these adjustments caused industrial consumption to increase and commercial consumption to decrease. The spreadsheet with these adjustments is Sectoral_reclassification5.xls (10/25/2012). In 2009, there was a significant decline in commercial electricity sales in MA and a corresponding increase in industrial sales Assuming that industrial consumption would have fallen by 2% between 2008 and 2009, the adjustment to both the commercial (+) and industrial sectors (-) was estimated to be 7.61 TWh. . The 7.61 TWh converted to Tbtu is 26.0. This value is then added to the negative 164.0 Tbtu in 2009 and subsequent years. State Energy Data System (Jan. 2017) via National Calibration worksheet """ # 1949-1969 published_consumption_trillion_btu = \ self.eia_comm.eia_api(id_='TOTAL.ESCCBUS.A', id_type='series') # Column W (electricity retail sales to the commercial sector) # for years 1949-69 published_consumption_trillion_btu = \ published_consumption_trillion_btu.rename( columns={'Electricity Retail Sales to the Commercial Sector, Annual, Trillion Btu': 'published_consumption_trillion_btu'}) # 1970-2018 national_calibration = self.collect_input_data('national_calibration') published_consumption_trillion_btu.loc['1970':, [ 'published_consumption_trillion_btu' ]] = national_calibration.loc['1970':, [ 'Final Est. (Trillion Btu)_elec' ]].values # Column G (electricity final est) # for years 1970-2018 # 1977-1989 years = list( range( 1977, max(published_consumption_trillion_btu.index.astype(int)) + 1)) years = [str(y) for y in years] # adjustment_to_commercial_trillion_btu_early = number_for_1990 adjustment_to_commercial_trillion_btu = \ [9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340312799975, 9.21340654000005, 29.77918535999970, 10.21012680399960, 1.70263235599987, -40.63866012000020, -40.63865670799990, -117.72073870000000, -117.72073528800000, -117.72073187600000, -117.72072846400000, -162.61452790400100, -136.25241618800100, -108.91594645600000, -125.97594304400000, -125.97593963200100, -163.95020989600000, -163.95020648400000, -163.95020307200000, -137.98708428968000, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100, -137.98487966000100] # First value is for 1977 - 2018 adjustment_df = \ pd.DataFrame([years, adjustment_to_commercial_trillion_btu]).transpose() adjustment_df.columns = \ ['Year', 'adjustment_to_commercial_trillion_btu'] adjusted_supplier_data = \ adjustment_df.merge(published_consumption_trillion_btu, how='outer', on='Year') adjusted_supplier_data['adjustment_to_commercial_trillion_btu'] = \ adjusted_supplier_data['adjustment_to_commercial_trillion_btu'].fillna(0) adjusted_supplier_data = adjusted_supplier_data.set_index('Year') adjusted_supplier_data['adjusted_consumption_trillion_btu'] = \ adjusted_supplier_data['adjustment_to_commercial_trillion_btu'].add(adjusted_supplier_data['published_consumption_trillion_btu']) adjusted_supplier_data['adjusted_consumption_trillion_btu'] = \ adjusted_supplier_data['adjusted_consumption_trillion_btu'].astype(float) adjusted_supplier_data = \ adjusted_supplier_data.sort_index(ascending=True) return adjusted_supplier_data[['adjusted_consumption_trillion_btu']] @staticmethod def get_saus(): """Get Data from the Statistical Abstract of the United States (SAUS) """ print('os.getcwd():', os.getcwd()) try: saus_2002 = \ pd.read_csv('./EnergyIntensityIndicators/Data/SAUS2002_table995.csv').set_index('Year') except FileNotFoundError: os.chdir('..') saus_2002 = \ pd.read_csv('./EnergyIntensityIndicators/Data/SAUS2002_table995.csv').set_index('Year') saus_1994 = { 1980: 738, 1981: 787, 1982: 631, 1983: 716, 1984: 901, 1985: 1039, 1986: 960, 1987: 933, 1988: 883, 1989: 867, 1990: 694, 1991: 477, 1992: 462, 1993: 479 } saus_2001 = { 1980: 738, 1981: None, 1982: None, 1983: None, 1984: None, 1985: 1039, 1986: None, 1987: None, 1988: None, 1989: 867, 1990: 694, 1991: 476, 1992: 462, 1993: 481, 1994: 600, 1995: 700, 1996: 723, 1997: 855, 1998: 1106, 1999: 1117, 2000: 1176 } saus_merged = dict() for (year, value) in saus_2001.items(): if value == None: set_value = saus_1994[year] else: set_value = value saus_merged[year] = set_value saus_merged_df = \ pd.DataFrame.from_dict( saus_merged, orient='index', columns=['Value']) return saus_2002, saus_merged_df @staticmethod def dod_compare_old(): """ DODCompareOld Note from PNNL (David B. Belzer): "These series are of unknown origin--need to check Jackson and Johnson 197 (sic)? """ dod_old = pd.read_csv( './EnergyIntensityIndicators/Data/DODCompareOld.csv').set_index( 'Year') cols_list = ['Retail', 'Auto R', 'Office', 'Warehouse'] dod_old['Commercial'] = dod_old[cols_list].sum(axis=1) dod_old_subset = dod_old.loc[list(range(1960, 1982)), cols_list] dod_old_hotel = dod_old.loc[list(range(1980, 1990)), ['Commercial']] return dod_old, dod_old_subset, dod_old_hotel def dodge_adjustment_ratios(self, dodge_dataframe, start_year, stop_year, adjust_years, late): """(1985, 1990) or (1960, 1970) """ year_indices = self.years_to_str(start_year, stop_year) revision_factor_commercial = dodge_dataframe.loc[year_indices, ['Commercial']].sum( axis=0).values categories = ['Retail', 'Auto R', 'Office', 'Warehouse', 'Hotel'] if late: col = 'Commercial' else: col = 'Commercial, Excl Hotel' for category in categories: revision_factor_cat = dodge_dataframe.loc[ year_indices, [category]].sum(axis=0).values / revision_factor_commercial dodge_dataframe.loc[adjust_years, [category]] = dodge_dataframe.loc[ adjust_years, [col]].values * revision_factor_cat[0] return dodge_dataframe def west_inflation(self): """Jackson and Johnson Estimate of West Census Region Inflation Factor, West Region Shares Based on CBECS Note from PNNL: "Staff: Based upon CBECS, the percentage of construction in west census region was slightly greater in the 1900-1919 period than in 1920-1945. Thus, factor is set to 1.12, approximately same as 1925 value published by Jackson and Johnson" """ # hist_stat = self.hist_stat()['Commercial (million SF)'] # hist_stat column E # # west inflation column Q ornl_78 = { 1925: 1.127, 1930: 1.144, 1935: 1.12, 1940: 1.182, 1945: 1.393, 1950: 1.216, 1951: 1.237, 1952: 1.224, 1953: 1.209, 1954: 1.213, 1955: 1.229 } all_years = list(range(min(ornl_78.keys()), max(ornl_78.keys()) + 1)) increment_years = list(ornl_78.keys()) final_factors = {year: 1.12 for year in list(range(1919, 1925))} for index, y_ in enumerate(increment_years): if index > 0: year_before = increment_years[index - 1] num_years = y_ - year_before infl_factor_year_before = ornl_78[year_before] infl_factor_y_ = ornl_78[y_] increment = 1 / num_years for delta in range(num_years): value = infl_factor_year_before * (1 - increment * delta) + \ infl_factor_y_ * (increment * delta) year = year_before + delta final_factors[year] = value final_factors_df = pd.DataFrame.from_dict(final_factors, columns=['Final Factors'], orient='index') return final_factors_df @staticmethod def years_to_str(start_year, end_year): """Create list of year strings from start_year and end_year range """ list_ = list(range(start_year, end_year + 1)) return [str(l) for l in list_] def hist_stat(self): """Historical Dodge Data through 1970 Data Source: Series N 90-100 Historical Statistics of the U.S., Colonial Times to 1970 """ historical_dodge = pd.read_csv( './EnergyIntensityIndicators/Data/historical_dodge_data.csv' ).set_index('Year') pub_inst_values = historical_dodge.loc[list(range(1919, 1925)), ['Pub&Institutional']].values total_1925_6 = pd.DataFrame.sum( historical_dodge.loc[list(range(1925, 1927)), ], axis=0).drop(index='Commercial (million SF)') inst_pub_total_1925 = pd.DataFrame.sum( historical_dodge.loc[1925, ].drop('Commercial (million SF)'), axis=0) inst_pub_total_1926 = pd.DataFrame.sum( historical_dodge.loc[1926, ].drop('Commercial (million SF)'), axis=0) inst_pub_total_1925_6 = inst_pub_total_1925 + inst_pub_total_1926 shares = total_1925_6.divide(inst_pub_total_1925_6) for col in list(total_1925_6.index): values = historical_dodge.loc[list(range(1919, 1925)), ['Pub&Institutional']].multiply( shares[col]).values historical_dodge.at[list(range(1919, 1925)), col] = values historical_dodge.at[list(range(1919, 1925)), ['Pub&Institutional']] = pub_inst_values return historical_dodge def hist_stat_adj(self): """Adjust historical Dodge data to account for omission of data for the West Census Region prior to 1956 """ hist_data = self.hist_stat() west_inflation = self.west_inflation() hist_data = hist_data.merge(west_inflation, how='outer', left_index=True, right_index=True) hist_data['Final Factors'] = hist_data['Final Factors'].fillna(1) adjusted_for_west = hist_data.drop( columns=['Final Factors', 'Pub&Institutional']).multiply( hist_data['Final Factors'].values, axis=0) return adjusted_for_west.loc[list(range(1919, 1960)), :] def dodge_revised(self): """Dodge Additions, adjusted for omission of West Census Region prior to 1956 """ saus_2002, saus_merged = self.get_saus() dod_old, dod_old_subset, dod_old_hotel = self.dod_compare_old() west_inflation = self.hist_stat_adj() dodge_revised = pd.read_csv( './EnergyIntensityIndicators/Data/Dodge_Data.csv').set_index( 'Year') dodge_revised.index = dodge_revised.index.astype(str) dodge_revised = dodge_revised.reindex( dodge_revised.columns.tolist() + ['Commercial, Excl Hotel', 'Hotel'], axis=1).fillna(np.nan) years_1919_1989 = self.years_to_str(1919, 1990) years_1990_1997 = self.years_to_str(1990, 1997) dodge_revised.loc[self.years_to_str(1960, 1981), ['Retail', 'Auto R', 'Office', 'Warehouse' ]] = dod_old_subset.values dodge_revised.loc[self.years_to_str(1919, 1959), ['Commercial, Excl Hotel']] = west_inflation[ 'Commercial (million SF)'].values.reshape( 41, 1) # hist_stat_adj column Q hist_adj_cols = [ 'Education', 'Hospital', 'Public', 'Religious', 'Soc/Amuse', 'Misc' ] dodge_revised.loc[self.years_to_str(1919, 1959), hist_adj_cols] = west_inflation.drop( 'Commercial (million SF)', axis=1).values dodge_revised.loc[self.years_to_str(1990, 1998), hist_adj_cols] = saus_2002.loc[ self.years_to_str(1990, 1998), [ 'Educational', 'Health', 'Pub. Bldg', 'Religious', 'Soc/Rec', 'Misc.' ]].values dodge_revised.loc[self.years_to_str(1990, 2003), ['Soc/Misc']] = saus_2002.loc[ self.years_to_str(1990, 2001), ['Soc/Rec']].add( saus_2002.loc[self.years_to_str(1990, 2001), ['Misc.']].values) dodge_revised.loc[self.years_to_str(1999, 2001), 'Misc'] = saus_2002.loc[ self.years_to_str(1999, 2001), ['Misc.']].values.reshape(3, ) dodge_revised.loc[self.years_to_str(1961, 1989), 'Misc'] = dodge_revised.loc[ self.years_to_str(1961, 1989), 'Soc/Misc'].subtract(dodge_revised.loc[ self.years_to_str(1961, 1989), 'Soc/Amuse'].values) dodge_revised.loc[str(2000), 'Hospital'] = saus_2002.loc[str(2000), 'Health'] dodge_revised.loc[self.years_to_str(1960, 1989), ['Commercial, Excl Hotel']] = dodge_revised.loc[ self.years_to_str(1960, 1989), ['Retail', 'Auto R', 'Office', 'Warehouse']].sum( axis=1) hotel_80_89 = saus_merged.loc[list(range(1980, 1989 + 1)), ['Value']].subtract(dod_old_hotel.values) dodge_revised.loc[self.years_to_str(1980, 1989), ['Hotel']] = hotel_80_89 hotel_80_89_ratio = hotel_80_89.sum(axis=0).values / dodge_revised.loc[ self.years_to_str(1980, 1989), ['Commercial, Excl Hotel']].sum(axis=0).values dodge_revised.loc[ self.years_to_str(1919, 1979), ['Hotel']] = dodge_revised.loc[ self.years_to_str(1919, 1979), ['Commercial, Excl Hotel']].values * hotel_80_89_ratio dodge_revised.loc[years_1990_1997, ['Commercial, Incl Hotel']] = saus_2002.loc[ years_1990_1997, ['Commercial']].values dodge_revised.loc[self.years_to_str(1985, 1989), ['Commercial']] = saus_merged.loc[ list(range(1985, 1989 + 1)), ['Value']].values dodge_revised.loc[self.years_to_str(1990, 2018), ['Commercial']] = dodge_revised.loc[ self.years_to_str(1990, 2018), ['Commercial, Incl Hotel']].values dodge_revised = self.dodge_adjustment_ratios( dodge_revised, 1960, 1969, adjust_years=self.years_to_str(1919, 1959), late=False) dodge_revised = self.dodge_adjustment_ratios( dodge_revised, 1985, 1989, adjust_years=self.years_to_str(1990, 2018), late=True) dodge_revised.loc[years_1919_1989, ['Commercial, Incl Hotel']] = dodge_revised.loc[ years_1919_1989, ['Commercial, Excl Hotel']].add( dodge_revised.loc[years_1919_1989, ['Hotel']].values) dodge_revised['Total'] = dodge_revised.drop( ['Commercial, Incl Hotel', 'Commercial, Excl Hotel'], axis=1).sum(axis=1).values return dodge_revised def dodge_to_cbecs(self): """Redefine the Dodge building categories more along the lines of CBECS categories. Constant fractions of floor space are moved among categories. Returns: dodge_to_cbecs (dataframe): redefined data """ # Key Assumptions: education_floor_space_office = .10 auto_repair_retail = .80 retail_merc_service = .80 # remainder to food service and sales retail_merc_service_food_sales = .11 retail_merc_service_food_service = .90 education_assembly = .05 education_misc = .05 # (laboratories) health_transfered_to_cbecs_health = .75 # 25% to lodging (nursing homes) misc_public_assembly = .10 # (passenger terminals) dodge_revised = self.dodge_revised() # dataframe dodge_to_cbecs = pd.DataFrame(dodge_revised[[ 'Total', 'Religious', 'Warehouse' ]]).rename(columns={'Total': 'Dodge_Totals'}) dodge_to_cbecs['Office'] = dodge_revised[ 'Office'] + education_floor_space_office * dodge_revised[ 'Education'] dodge_to_cbecs['Merc/Serv'] = retail_merc_service * ( dodge_revised['Retail'] + auto_repair_retail * dodge_revised['Auto R']) dodge_to_cbecs['Food_Sales'] = retail_merc_service_food_sales * ( dodge_revised['Retail'] + auto_repair_retail * dodge_revised['Auto R']) dodge_to_cbecs['Food_Serv'] = retail_merc_service_food_service * ( dodge_revised['Retail'] + auto_repair_retail * dodge_revised['Auto R']) dodge_to_cbecs['Education'] = ( 1 - education_floor_space_office - education_assembly - education_misc) * dodge_revised['Education'] dodge_to_cbecs[ 'Health'] = health_transfered_to_cbecs_health * dodge_revised[ 'Hospital'] dodge_to_cbecs['Lodging'] = dodge_revised['Hotel'] + ( 1 - health_transfered_to_cbecs_health) * dodge_revised['Hospital'] dodge_to_cbecs['Assembly'] = dodge_revised[ 'Soc/Amuse'] + misc_public_assembly * dodge_revised[ 'Misc'] + education_assembly * dodge_revised['Education'] dodge_to_cbecs['Other'] = dodge_revised['Public'] + ( 1 - misc_public_assembly) * dodge_revised['Misc'] + ( 1 - auto_repair_retail) * dodge_revised[ 'Auto R'] + education_misc * dodge_revised['Education'] dodge_to_cbecs['Redefined_Totals'] = dodge_to_cbecs.drop( 'Dodge_Totals', axis=1).sum(axis=1).values # dodge_to_cbecs = dodge_to_cbecs.drop() # don't need totals? return dodge_to_cbecs def nems_logistic(self, dataframe, params): """ PNNL errors found: - Column S in spreadsheet has CO-StatePop2.xls are incorrectly aligned with years - Column AL does not actually scale by 1.28 as suggested in the column header """ current_year = dt.datetime.now().year link_factors = pd.read_excel( f'{self.directory}/CO-EST_statepop2.xls', sheet_name='Stock', usecols='D:E', header=1, skiprows=158).rename(columns={1789: 'Year'}) state_pop = link_factors.set_index('Year').rename( columns={' New': 'state_pop'}) state_pop = state_pop[state_pop.index.notnull()] state_pop.index = state_pop.index.astype(int) state_pop.index = state_pop.index.astype(str) dataframe = dataframe.merge(state_pop, how='outer', left_index=True, right_index=True) dataframe = dataframe[dataframe.index.notnull()] dataframe = dataframe.reindex(columns=dataframe.columns.tolist() + [ 'adjusted_state_pop', 'adjusted_state_pop_scaled_b', 'adjusted_state_pop_scaled_c', 'scaled_additions_estimate_a', 'scaled_additions_estimate_b', 'scaled_additions_estimate_c', 'removal', 'adjusted_removals', 'old_stk_retain', 'floorspace_bsf' ]) dataframe['Year_Int'] = dataframe.index.astype(int) dataframe['age'] = dataframe['Year_Int'].subtract( current_year).multiply(-1) dataframe['remaining'] = ((dataframe['age'].divide(params[1])).pow( params[0]).add(1)).pow(-1) dataframe['inflate_fac'] = dataframe['remaining'].pow(-1) link_factor = 0.1 adjusted_state_pop_1 = 40 timing_wgts_current_yr = 0.4 timing_wgts_lag_yr = 0.6 benchmark_factor = 1 dataframe.loc[str(1838), ['state_pop']] = 400 dataframe.loc[self.years_to_str(1838, 1919), ['adjusted_state_pop']] = dataframe.loc[ self.years_to_str(1838, 1919), ['state_pop']].values * link_factor dataframe.loc[self.years_to_str(1920, current_year), ['adjusted_state_pop']] = dataframe.loc[ self.years_to_str(1920, current_year), ['Redefined_Totals']].values for year in self.years_to_str(1838, current_year): adjusted_state_pop_value = dataframe.loc[ year, ['adjusted_state_pop']].values if year == '1838': vpip_estimate = adjusted_state_pop_value elif year == '1920': vpip_estimate = adjusted_state_pop_value else: adjusted_state_pop_year_before = dataframe.loc[ str(int(year) - 1), ['adjusted_state_pop']].values vpip_estimate = ( timing_wgts_current_yr * adjusted_state_pop_value + timing_wgts_lag_yr * adjusted_state_pop_year_before) * benchmark_factor dataframe.loc[year, 'VPIP-Estimate'] = vpip_estimate _variable = 1.2569 # This should be solved for x_column_value = _variable db_estimates = 1.2 db_estimates2 = [1.25 - 0.01 * d for d in list(range(1990, 2021))] post_1989_scaling_factor = db_estimates # Should choose this variable_2 = 1.533 # This should be solved for without_lags = dataframe.loc[self.years_to_str(1990, current_year), ['adjusted_state_pop']].multiply( post_1989_scaling_factor) dataframe.loc[:str(1989), ['scaled_additions_estimate_a']] = dataframe.loc[:str( 1989), ['VPIP-Estimate']].values * variable_2 dataframe.loc[self.years_to_str(1990, current_year), ['scaled_additions_estimate_a']] = dataframe.loc[ self.years_to_str(1990, current_year), ['VPIP-Estimate']].values * post_1989_scaling_factor dataframe.loc[:str(1989), ['adjusted_state_pop_scaled_b']] = dataframe.loc[ self.years_to_str(1790, 1989), ['scaled_additions_estimate_a']].values dataframe.loc[self.years_to_str(1990, 2001), ['adjusted_state_pop_scaled_b']] = dataframe.loc[ self.years_to_str(1990, 2001), ['scaled_additions_estimate_a']].values * 1.15 dataframe.loc[self.years_to_str(2002, current_year), ['adjusted_state_pop_scaled_b']] = dataframe.loc[ self.years_to_str(2002, current_year), ['scaled_additions_estimate_a']].values dataframe.loc[str(1790), ['adjusted_state_pop_scaled_c']] = 1 dataframe.loc[self.years_to_str(1791, 1989), ['adjusted_state_pop_scaled_c']] = dataframe.loc[ self.years_to_str(1791, 1989), ['scaled_additions_estimate_a']].values dataframe.loc[self.years_to_str(1990, 2001), ['adjusted_state_pop_scaled_c']] = dataframe.loc[ self.years_to_str(1990, 2001), ['scaled_additions_estimate_a']].values * 1.28 dataframe.loc[self.years_to_str(2002, current_year), ['adjusted_state_pop_scaled_c']] = dataframe.loc[ self.years_to_str(2002, current_year), ['scaled_additions_estimate_a']].values for y in self.years_to_str(1839, current_year): years_diff = current_year - 1870 start_year = int(y) - years_diff # first_index_year = int(dataframe.index[0]) # if start_year < first_index_year: # start_year = first_index_year year_index = self.years_to_str(start_year, int(y)) remaining = dataframe.loc[self.years_to_str(1870, current_year), ['remaining']].values.flatten() adjusted_state_pop_scaled_b = dataframe.loc[ year_index, ['adjusted_state_pop_scaled_b']].fillna(0).values.flatten() adjusted_state_pop_scaled_c = dataframe.loc[ year_index, ['adjusted_state_pop_scaled_c']].fillna(0).values.flatten() b_value = np.dot(adjusted_state_pop_scaled_b, remaining) c_value = np.dot(adjusted_state_pop_scaled_c, remaining) dataframe.loc[y, ['scaled_additions_estimate_b']] = b_value dataframe.loc[y, ['scaled_additions_estimate_c']] = c_value removal_chg = 1 # Not sure what this is about fractions = [0.3, 0.4, 0.4, 0.35, 0.35, 0.35, 0.35, 0.3, 0.3, 0.3] fraction_retained = [f * removal_chg for f in fractions] for i in dataframe.index: if i >= '1870': removal = dataframe.loc[ i, ['scaled_additions_estimate_c']].values - dataframe.loc[ str(int(i) - 1), ['scaled_additions_estimate_c']].values - dataframe.loc[ i, ['scaled_additions_estimate_a']].values dataframe.loc[i, ['removal']] = removal dataframe.loc[self.years_to_str(2009, 2009 + len(fractions) - 1), ['adjusted_removals']] = dataframe.loc[ self.years_to_str(2009, 2009 + len(fractions) - 1), ['removal']].values.flatten() * fraction_retained for y_ in list(range(2009, 2009 + len(fractions))): if y_ == 2009: dataframe.loc[str(y_), ['old_stk_retain']] = dataframe.loc[ str(y_), ['adjusted_removals']] else: dataframe.loc[str(y_), ['old_stk_retain']] = dataframe.loc[ str(y_ - 1), ['old_stk_retain']].values + dataframe.loc[ str(y_), ['adjusted_removals']].values dataframe['adjusted_removals'] = dataframe['adjusted_removals'].fillna( 0) dataframe.loc[ self.years_to_str(1960, current_year), ['floorspace_bsf']] = dataframe.loc[ self.years_to_str(1960, current_year), ['scaled_additions_estimate_c']].values - dataframe.loc[ self.years_to_str(1960, current_year), ['adjusted_removals']].values return dataframe[['floorspace_bsf']].dropna() def solve_logistic(self, dataframe): """Solve NES logistic parameters """ pnnl_coefficients = [3.92276415015621, 73.2238120168849] # [gamma, lifetime] # popt, pcov = curve_fit(self.nems_logistic, xdata=dataframe[], ydata=dataframe[] , p0=pnnl_coefficients) # return popt return pnnl_coefficients def activity(self): """Use logistic parameters to find predicted historical floorspace Returns: historical_floorspace_billion_sq_feet (pd.DataFrame): historical floorspace in the Commercial Sector. Years: Units: Billion Square Feet Data Source: """ dodge_to_cbecs = self.dodge_to_cbecs( ) # columns c-m starting with year 1920 (row 17) coeffs = self.solve_logistic(dodge_to_cbecs) historical_floorspace_late = self.nems_logistic( dodge_to_cbecs, coeffs) # properly formatted? historical_floorspace_early = { 1949: 27235.1487296062, 1950: 27788.6370796569, 1951: 28246.642791733, 1952: 28701.4989706012, 1953: 29253.2282427217, 1954: 29913.8330998026, 1955: 30679.7157232176, 1956: 31512.6191323126, 1957: 32345.382764321, 1958: 33206.8483392728, 1959: 34088.6640247816 } historical_floorspace_early = pd.DataFrame.from_dict( historical_floorspace_early, columns=['floorspace_bsf'], orient='index') historical_floorspace_early.index = historical_floorspace_early.index.astype( str) historical_floorspace = pd.concat( [historical_floorspace_early, historical_floorspace_late]) historical_floorspace_billion_sq_feet = historical_floorspace.multiply( 0.001) return historical_floorspace_billion_sq_feet def fuel_electricity_consumption(self): """ Trillion Btu Returns: energy_data (dict): Dictionary of dataframes with keys 'elec' and 'fuels' """ year_range = list(range(1949, 1970)) year_range = [str(y) for y in year_range] national_calibration = self.collect_input_data('national_calibration') total_primary_energy_consumption = \ self.eia_comm.eia_api(id_='TOTAL.TXCCBUS.A', id_type='series') # pre 1969: AER table 2.1c update column U total_primary_energy_consumption = \ total_primary_energy_consumption.rename( columns={'Total Primary Energy Consumed by the Commercial Sector, Annual, Trillion Btu': 'total_primary'}) # total_primary_energy_consumption = total_primary_energy_consumption[total_primary_energy_consumption.index.isin(year_range)] # total_primary_energy_consumption = total_primary_energy_consumption.multiply(0.001) fuels_dataframe = total_primary_energy_consumption.copy() replacement_data = national_calibration.loc['1970':, [ 'Final Est. (Trillion Btu)_fuels' ]] # >= 1970: National Calibration Column 0 fuels_dataframe.loc['1970':, ['total_primary']] = replacement_data.values fuels_dataframe = fuels_dataframe.rename( columns={'total_primary': 'adjusted_consumption_trillion_btu'}) fuels_dataframe['adjusted_consumption_trillion_btu'] = fuels_dataframe[ 'adjusted_consumption_trillion_btu'].astype(float) elec_dataframe = self.adjusted_supplier_data() energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe} return energy_data def get_seds(self): """Collect SEDS data Returns: data (dict): Dictionary of dataframes with keys 'elec' and 'fuels' """ seds = self.collect_input_data('SEDS_CensusRgn') census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'} total_fuels = seds[0].rename(columns=census_regions) elec = seds[1].rename(columns=census_regions) data = {'elec': elec, 'fuels': total_fuels} return data def collect_weather(self, comm_activity): """Gather weather data for the Commercial sector Args: comm_activity ([type]): [description] Returns: weather_data (dict): [description] """ seds = self.get_seds() res = ResidentialIndicators(directory=self.directory, output_directory=self.output_directory, base_year=self.base_year) # residential_activity_data = res.get_floorspace() # residential_floorspace = residential_activity_data['floorspace_square_feet'] weather = WeatherFactors(sector='commercial', directory=self.directory, activity_data=comm_activity) # residential_floorspace=residential_floorspace) weather_factors = weather.get_weather(seds_data=seds) # weather_factors = weather.adjust_for_weather() # What should this return?? (e.g. weather factors or weather adjusted data, both?) weather_data = dict() for key, value in weather_factors.items(): value = value.drop('electricity_weather_factor', axis=1, errors='ignore') weather_data[key] = value return weather_data def collect_data(self): """Gather decomposition input data for the Commercial sector Returns: data_dict (dict): Commercial Sector data input to the LMDI model """ # Activity: Floorspace_Estimates column U, B # Energy: Elec --> Adjusted Supplier Data Column D # Fuels --> AER11 Table 2.1C_Update column U, National Calibration Column O activity_data = self.activity() print('Activity data collected without issue') energy_data = self.fuel_electricity_consumption() print('Energy data collected without issue') weather_factors = \ self.collect_weather(comm_activity=activity_data) data_dict = { 'Commercial_Total': { 'energy': energy_data, 'activity': activity_data, 'weather_factors': weather_factors } } return data_dict def main(self, breakout, calculate_lmdi): """Decompose energy use for the Commercial sector Args: breakout ([type]): [description] calculate_lmdi ([type]): [description] Returns: [type]: [description] """ data_dict = self.collect_data() results_dict, formatted_results = \ self.get_nested_lmdi(level_of_aggregation=self.level_of_aggregation, breakout=breakout, calculate_lmdi=calculate_lmdi, raw_data=data_dict, lmdi_type='LMDI-I') return results_dict
class ResidentialIndicators(CalculateLMDI): """Class to decompose changes in Energy Consumption from the Residential Sector of the US Economy """ def __init__(self, directory, output_directory, level_of_aggregation=None, lmdi_model='multiplicative', base_year=1985, end_year=2018): self.eia_res = GetEIAData('residential') housing_types = \ {'Single-Family': None, 'Multi-Family': None, 'Manufactured-Homes': None} self.sub_categories_list = \ {'National': {'Northeast': housing_types, 'Midwest': housing_types, 'South': housing_types, 'West': housing_types}} self.national_calibration = \ self.eia_res.national_calibration() self.seds_census_region = \ self.eia_res.get_seds() # energy_consumtpion_data_regional RF = ResidentialFloorspace() self.ahs_Data = RF.update_ahs_data() self.regions = ['Northeast', 'South', 'West', 'Midwest', 'National'] self.base_year = base_year self.directory = directory self.end_year = end_year self.energy_types = ['elec', 'fuels', 'deliv', 'source'] super().__init__(sector='residential', level_of_aggregation=level_of_aggregation, lmdi_models=lmdi_model, categories_dict=self.sub_categories_list, energy_types=self.energy_types, directory=directory, output_directory=output_directory, primary_activity='occupied_housing_units', base_year=base_year, end_year=end_year, weather_activity='floorspace_square_feet') print("self.dir()):", dir(self)) # self.AER11_table2_1b_update = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' # self.AnnualData_MER_22_Dec2019 = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' ? # self.RECS_intensity_data = # '711250' for Residential Sector Energy Consumption def get_seds(self): """Collect SEDS data Returns: total_fuels (pd.DataFrame): Fuels data elec (pd.DataFrame): Elec Data """ census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'} total_fuels = self.seds_census_region[0].rename(columns=census_regions) elec = self.seds_census_region[1].rename(columns=census_regions) return total_fuels, elec def fuel_electricity_consumption(self, total_fuels, elec, region): """Combine Energy datasets into one Energy Consumption dataframe in Trillion Btu Data Source: EIA's State Energy Data System (SEDS) Args: total_fuels ([type]): [description] elec ([type]): [description] region ([type]): [description] Returns: energy_data (dict): [description] """ fuels_dataframe = total_fuels[[region]] elec_dataframe = elec[[region]] energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe} return energy_data def get_floorspace(self): """Collect floorspace data for the Residential sector Returns: final_floorspace_results (dict): [description] """ residential_data = ResidentialFloorspace(end_year=self.end_year) floorspace_square_feet, \ occupied_housing_units, \ household_size_square_feet_per_hu = \ residential_data.final_floorspace_estimates() final_floorspace_results = \ {'occupied_housing_units': occupied_housing_units, 'floorspace_square_feet': floorspace_square_feet, 'household_size_square_feet_per_hu': household_size_square_feet_per_hu} return final_floorspace_results def activity(self, floorspace): """Combine Energy datasets into one Energy Consumption Occupied Housing Units Args: floorspace ([type]): [description] Returns: all_activity (dict): [description] """ all_activity = dict() for region in self.sub_categories_list['National'].keys(): region_activity = dict() for variable, data in floorspace.items(): df = data[region] if variable == 'household_size_square_feet_per_hu': df = df.rename( columns={ 'avg_size_sqft_mf': 'Multi-Family', 'avg_size_sqft_mh': 'Manufactured-Homes', 'avg_size_sqft_sf': 'Single-Family' }) else: df = df.rename( columns={ 'occupied_units_mf': 'Multi-Family', 'occupied_units_mh': 'Manufactured-Homes', 'occupied_units_sf': 'Single-Family' }) region_activity[variable] = df all_activity[region] = region_activity return all_activity def collect_weather(self, energy_dict, nominal_energy_intensity): """Collect weather data for the Residential Sector Args: energy_dict ([type]): [description] nominal_energy_intensity ([type]): [description] Returns: weather_factors (dict): [description] """ weather = \ WeatherFactors(sector='residential', directory=self.directory, nominal_energy_intensity=nominal_energy_intensity) # What should this return?? (e.g. weather factors or weather adjusted data, both?) weather_factors = weather.get_weather(energy_dict, weather_adjust=False) return weather_factors def collect_data(self): """Gather all input data for you in decomposition of energy use for the Residential sector Returns: all_data (dict): All input data for the Residential Sector Energy Decomposition """ total_fuels, elec = self.get_seds() floorspace = self.get_floorspace() activity = self.activity(floorspace) all_data = dict() nominal_energy_intensity_by_r = dict() for r in self.sub_categories_list['National'].keys(): region_activity = activity[r] energy_data = \ self.fuel_electricity_consumption(total_fuels, elec, region=r) nominal_energy_intensity_by_e = dict() for e, e_df in energy_data.items(): e_df = e_df.rename_axis(columns=None) floorspace = region_activity['floorspace_square_feet'] total_floorspace = floorspace.sum(axis=1) nominal_energy_intensity = \ self.nominal_energy_intensity( energy_input_data=e_df, activity_data_=total_floorspace) nominal_energy_intensity_by_e[e] = \ nominal_energy_intensity region_data = {'energy': energy_data, 'activity': region_activity} nominal_energy_intensity_by_r[r] = nominal_energy_intensity_by_e all_data[r] = region_data weather_factors = self.collect_weather( energy_dict=energy_data, nominal_energy_intensity=nominal_energy_intensity_by_r ) # need to integrate this into the data passed to LMDI national_weather_dict = dict() for region, r_dict_ in all_data.items(): weather_factors_by_e_type = dict() for e_ in r_dict_['energy'].keys(): national_weather_dict[e_] = \ weather_factors[e_][[f'{e_}_weather_factor']] e_r_weather =\ weather_factors[e_][[f'{region.lower()}_weather_factor']] weather_factors_by_e_type[e_] = e_r_weather r_dict_['weather_factors'] = weather_factors_by_e_type all_data[region] = r_dict_ all_data = {'National': all_data} return all_data def main(self, breakout, calculate_lmdi): """Calculate decomposition for the Residential sector Args: breakout ([type]): [description] calculate_lmdi ([type]): [description] Returns: [type]: [description] """ unit_conversion_factor = 1 data_dict = self.collect_data() results_dict, formatted_results = \ self.get_nested_lmdi( level_of_aggregation=self.level_of_aggregation, breakout=breakout, calculate_lmdi=calculate_lmdi, raw_data=data_dict, lmdi_type='LMDI-I') return results_dict
class ResidentialIndicators(CalculateLMDI): def __init__(self, directory, output_directory, level_of_aggregation=None, lmdi_model='multiplicative', base_year=1985, end_year=2018): self.eia_res = GetEIAData('residential') self.sub_categories_list = { 'National': { 'Northeast': { 'Single-Family': None, 'Multi-Family': None, 'Manufactured-Homes': None }, 'Midwest': { 'Single-Family': None, 'Multi-Family': None, 'Manufactured-Homes': None }, 'South': { 'Single-Family': None, 'Multi-Family': None, 'Manufactured-Homes': None }, 'West': { 'Single-Family': None, 'Multi-Family': None, 'Manufactured-Homes': None } } } self.national_calibration = self.eia_res.national_calibration() self.seds_census_region = self.eia_res.get_seds( ) # energy_consumtpion_data_regional self.ahs_Data = ResidentialFloorspace.update_ahs_data() self.conversion_factors = self.eia_res.conversion_factors() self.regions = ['Northeast', 'South', 'West', 'Midwest', 'National'] self.base_year = base_year self.directory = directory self.end_year = end_year self.energy_types = ['elec', 'fuels', 'deliv', 'source'] super().__init__(sector='residential', level_of_aggregation=level_of_aggregation, lmdi_models=lmdi_model, categories_dict=self.sub_categories_list, \ energy_types=self.energy_types, directory=directory, output_directory=output_directory, base_year=base_year) # self.AER11_table2_1b_update = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' # self.AnnualData_MER_22_Dec2019 = GetEIAData.eia_api(id_='711250') # 'http://api.eia.gov/category/?api_key=YOUR_API_KEY_HERE&category_id=711250' ? # self.RECS_intensity_data = # '711250' for Residential Sector Energy Consumption def get_seds(self): census_regions = {4: 'West', 3: 'South', 2: 'Midwest', 1: 'Northeast'} total_fuels = self.seds_census_region[0].rename(columns=census_regions) elec = self.seds_census_region[1].rename(columns=census_regions) return total_fuels, elec def fuel_electricity_consumption(self, total_fuels, elec, region): """Combine Energy datasets into one Energy Consumption dataframe in Trillion Btu Data Source: EIA's State Energy Data System (SEDS)""" fuels_dataframe = total_fuels[[region]] elec_dataframe = elec[[region]] energy_data = {'elec': elec_dataframe, 'fuels': fuels_dataframe} return energy_data def get_floorspace(self): residential_data = ResidentialFloorspace(end_year=self.end_year) floorspace_square_feet, occupied_housing_units, household_size_square_feet_per_hu = residential_data.final_floorspace_estimates( ) final_floorspace_results = { 'occupied_housing_units': occupied_housing_units, 'floorspace_square_feet': floorspace_square_feet, 'household_size_square_feet_per_hu': household_size_square_feet_per_hu } return final_floorspace_results def activity(self, floorspace): """Combine Energy datasets into one Energy Consumption Occupied Housing Units """ all_activity = dict() for region in self.sub_categories_list['National'].keys(): region_activity = dict() for variable, data in floorspace.items(): df = data[region] if variable == 'household_size_square_feet_per_hu': df = df.rename( columns={ 'avg_size_sqft_mf': 'Multi-Family', 'avg_size_sqft_mh': 'Manufactured-Homes', 'avg_size_sqft_sf': 'Single-Family' }) else: df = df.rename( columns={ 'occupied_units_mf': 'Multi-Family', 'occupied_units_mh': 'Manufactured-Homes', 'occupied_units_sf': 'Single-Family' }) print(variable, df.columns) region_activity[variable] = df all_activity[region] = region_activity return all_activity def collect_weather(self, energy_dict, nominal_energy_intensity): weather = WeatherFactors( sector='residential', directory=self.directory, nominal_energy_intensity=nominal_energy_intensity) weather_factors = weather.get_weather( energy_dict, weather_adjust=False ) # What should this return?? (e.g. weather factors or weather adjusted data, both?) return weather_factors def collect_data(self): total_fuels, elec = self.get_seds() floorspace = self.get_floorspace() activity = self.activity(floorspace) all_data = dict() nominal_energy_intensity_by_r = dict() for r in self.sub_categories_list['National'].keys(): region_activity = activity[r] energy_data = self.fuel_electricity_consumption(total_fuels, elec, region=r) nominal_energy_intensity_by_e = dict() for e, e_df in energy_data.items(): e_df = e_df.rename_axis(columns=None) floorspace = region_activity['floorspace_square_feet'] total_floorspace = floorspace.sum(axis=1) nominal_energy_intensity = self.lmdi( model=None, activity_input_data=total_floorspace, energy_input_data=e_df, unit_conversion_factor=1, return_nominal_energy_intensity=True ) # shouldn't rely on multiplicative? nominal_energy_intensity_by_e[e] = nominal_energy_intensity region_data = {'energy': energy_data, 'activity': region_activity} nominal_energy_intensity_by_r[r] = nominal_energy_intensity_by_e all_data[r] = region_data weather_factors = self.collect_weather( energy_dict=energy_data, nominal_energy_intensity=nominal_energy_intensity_by_r ) # need to integrate this into the data passed to LMDI for region, r_dict_ in all_data.items(): weather_factors_by_e_type = dict() for e_ in r_dict_['energy'].keys(): weather_factors_by_e_type[e] = weather_factors r_dict_['weather_factors'] = weather_factors_by_e_type all_data[region] = r_dict_ return all_data def main(self, breakout, save_breakout, calculate_lmdi): unit_conversion_factor = 1 data_dict = self.collect_data() results_dict, formatted_results = self.get_nested_lmdi( level_of_aggregation=self.level_of_aggregation, breakout=breakout, save_breakout=save_breakout, calculate_lmdi=calculate_lmdi, raw_data=data_dict, account_for_weather=True) return results