def clean_timeseries(self, attr='values', inplace=True, time_index_name='year', time_index=None, lower=0, upper=None, interpolation_method='missing', extrapolation_method='missing'): if time_index is None: time_index = cfg.years interpolation_method = self.interpolation_method if interpolation_method is 'missing' else interpolation_method extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method exp_growth_rate = self.extrapolation_growth if hasattr( self, 'extrapolation_growth') else None data = getattr(self, attr) clean_data = TimeSeries.clean( data=data, newindex=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, exp_growth_rate=exp_growth_rate).clip(lower=lower, upper=upper) if inplace: setattr(self, attr, clean_data) else: return clean_data
def run_all_cleaning_methods(self, x, y, newindex): for method in self.methods: data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method=(None if method=='decay_towards_linear_regression' else method), # not supported for linear regression extrapolation_method=method)
def run_all_cleaning_methods(self, x, y, newindex): for method in self.methods: print method print x, y data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method=method) plt.plot(newdata.index, newdata[0]) plt.plot(x, y, '.')
def run_all_cleaning_methods(self, x, y, newindex): for method in self.methods: data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean( data, newindex=newindex, interpolation_method=( None if method == 'decay_towards_linear_regression' else method), # not supported for linear regression extrapolation_method=method)
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels([-1]+range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi((allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) ratio_allocated_to_impacted.iloc[np.nonzero(impacted_gaus_slice.values==0)] = 0 clean_ratio = TimeSeries.clean(data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult((clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list(allocated_foreign_gau_slice_foreign_geo.index.get_level_values(y_or_v).values) allocated_foreign_gau_slice_foreign_geo = allocated_foreign_gau_slice_foreign_geo.reorder_levels(df.index.names).sort() indexer = util.level_specific_indexer(allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[indexer, :] new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') # update native GAUs after netting out foreign gaus impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method=method) plt.plot(newdata.index, newdata[0]) plt.plot(x, y, '.') #newindex = np.arange(2015, 2025) newindex = np.arange(2012, 2017) x = np.array([2015, 2018, 2020]) y = np.array([.8, .7, .4]) data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest') # # #newindex = np.arange(2020, 2025) #multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year']) #newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest') newindex = np.arange(2015, 2050) multi_data = pd.concat([data] * 3, keys=['a', 'b', 'c'], names=['dummy', 'year']) newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='nearest',
target=rio_emissions_fe, earliest_year=2040) ep_emissions = pd.concat([ ep_emissions_fe.reset_index(), ep_emissions[ep_emissions.index.get_level_values('FINAL_ENERGY') != ep_fe].reset_index() ]).set_index(ep_emissions.index.names).sort_index() print '\n scaling for all emissions' ep_emissions = ep_emissions.groupby( level=['TIMESTAMP', 'SCENARIO', 'YEAR']).apply(scale, target=remove_df_levels( rio_emissions, 'product fuel'), earliest_year=2020) # interpolate between all years print 'interpolating emissions' ep_emissions = reindex_df_level_with_new_elements( ep_emissions, 'YEAR', rio_emissions_years).fillna(0) ep_emissions = ep_emissions.reset_index().set_index( ep_emissions.index.names).sort_index() ep_emissions = TimeSeries.clean(ep_emissions, newindex=years, time_index_name='YEAR', interpolation_method='linear_interpolation') print 'saving scaled emissions' ep_emissions.to_csv( r"D:\Dropbox (EER)\Evolved Energy Research\Projects & Marketing\Princeton University\Ryan's output template\combined_outputs\c_emissions_scaled.csv" )
def run_all_cleaning_methods(self, x, y, newindex): for method in self.methods: data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method=(None if method=='decay_towards_linear_regression' else method), # not supported for linear regression extrapolation_method=method) #newindex = np.arange(2015, 2025) newindex = np.arange(2012, 2017) x = np.array([2015, 2018, 2020]) y = np.array([.8, .7, .4]) data = pd.DataFrame(y, index=x) newdata = TimeSeries.clean(data, newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest') # # #newindex = np.arange(2020, 2025) #multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year']) #newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='linear_interpolation', extrapolation_method='nearest') newindex = np.arange(2015, 2050) multi_data = pd.concat([data]*3, keys=['a', 'b', 'c'], names=['dummy', 'year']) newdata2 = TimeSeries.clean(multi_data, time_index_name='year', newindex=newindex, interpolation_method='nearest', extrapolation_method='exponential') #raw_values = pd.read_csv('raw_values_example_for_clean_timeseries.csv') #raw_values.set_index(['us', 'efficiency_type', 'supply_node', 'year'], inplace=True) #raw_values.sort_index(inplace=True)
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index( df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename( foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult( (foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels( [-1] + range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi( (allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) clean_ratio = TimeSeries.clean( data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult( (clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename( current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list( allocated_foreign_gau_slice_foreign_geo.index.get_level_values( y_or_v).values) indexer = util.level_specific_indexer( allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) try: df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[ indexer, :] except: pdb.set_trace() new_impacted_gaus = util.DfOper.subt( (impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels( df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}' .format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError( 'Year or vitages did not overlap between the foreign gaus and impacted gaus' ) # update native GAUs after netting out foreign gaus impacted_gau_years = list( impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer( df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df