def calculate_tco(self): # self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions_tco') # del self.demand.outputs.demand_embodied_emissions #calculte and format direct demand emissions # self.direct_emissions_df = self.demand.outputs.return_cleaned_output('demand_direct_emissions') ## del self.demand.outputs.demand_direct_emissions # emissions = util.DfOper.add([self.embodied_emissions_df,self.direct_emissions_df]) # #calculate and format export costs cost_unit = cfg.cfgfile.get('case','currency_year_id') + " " + cfg.cfgfile.get('case','currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = self.demand.d_levelized_costs_tco demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] service_demand_df = self.demand.d_service_demand_tco service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage] keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]), util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])], keys=keys,names=names) self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0) self.outputs.c_tco[self.outputs.c_tco<0]=0 for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'): indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id) self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper() self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True) self.outputs.c_tco.columns = [cost_unit.upper()] self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0] self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
def calculate_tco(self): cost_unit = cfg.getParam('currency_year') + " " + cfg.getParam('currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_tco supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = self.demand.d_levelized_costs_tco demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] service_demand_df = self.demand.d_service_demand_tco service_demand_df = service_demand_df[service_demand_df.index.get_level_values('vintage')>=initial_vintage] keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_tco = pd.concat([util.DfOper.divi([supply_side_df,util.remove_df_levels(service_demand_df,'unit')]), util.DfOper.divi([demand_side_df,util.remove_df_levels(service_demand_df,'unit')])], keys=keys,names=names) self.outputs.c_tco = self.outputs.c_tco.replace([np.inf,np.nan],0) self.outputs.c_tco[self.outputs.c_tco<0]=0 for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'service_demand') and hasattr(subsector,'stock'): indexer = util.level_specific_indexer(self.outputs.c_tco,'subsector',subsector.id) self.outputs.c_tco.loc[indexer,'unit'] = subsector.service_demand.unit.upper() self.outputs.c_tco = self.outputs.c_tco.set_index('unit',append=True) self.outputs.c_tco.columns = [cost_unit.upper()] self.outputs.c_tco= self.outputs.c_tco[self.outputs.c_tco[cost_unit.upper()]!=0] self.outputs.c_tco = self.outputs.return_cleaned_output('c_tco')
def set_opt_bulk_net_loads(self, bulk_load, bulk_gen, dispatched_bulk_load, bulk_net_load, active_thermal_dispatch_df): bulk_load = self._convert_weather_datetime_to_hour(bulk_load) bulk_gen = self._convert_weather_datetime_to_hour(bulk_gen) dispatched_bulk_load = self._convert_weather_datetime_to_hour(dispatched_bulk_load) bulk_net_load = util.remove_df_levels(util.df_slice(bulk_net_load,2,'timeshift_type',drop_level=True),'year') bulk_net_load = self._convert_ld_weather_datetime_to_hour(bulk_net_load) self.bulk_load = self._timeseries_to_dict(bulk_load) self.dispatched_bulk_load = self._timeseries_to_dict(dispatched_bulk_load) self.bulk_gen = self._timeseries_to_dict(bulk_gen) thermal_unstacked = active_thermal_dispatch_df.squeeze().unstack('IO') must_run_sum = thermal_unstacked[thermal_unstacked['must_run']==1]['capacity'].groupby(level=cfg.dispatch_geography).sum().to_frame() # this includes must run generation self.ld_bulk_net_load_df = util.DfOper.subt((util.remove_df_levels(bulk_net_load,'period').to_frame(), must_run_sum)) self.ld_bulk_net_load = self.ld_bulk_net_load_df.squeeze().to_dict()
def set_opt_bulk_net_loads(self, bulk_load, bulk_gen, dispatched_bulk_load, bulk_net_load, active_thermal_dispatch_df): bulk_load = self._convert_weather_datetime_to_hour(bulk_load) bulk_gen = self._convert_weather_datetime_to_hour(bulk_gen) dispatched_bulk_load = self._convert_weather_datetime_to_hour(dispatched_bulk_load) bulk_net_load = util.remove_df_levels(bulk_net_load,'year') bulk_net_load = self._convert_ld_weather_datetime_to_hour(bulk_net_load) self.bulk_load = self._timeseries_to_dict(bulk_load) self.dispatched_bulk_load = self._timeseries_to_dict(dispatched_bulk_load) self.bulk_gen = self._timeseries_to_dict(bulk_gen) thermal_unstacked = active_thermal_dispatch_df.squeeze().unstack('IO') must_run_sum = thermal_unstacked[thermal_unstacked['must_run']==1]['capacity'].groupby(level=GeoMapper.dispatch_geography).sum().to_frame() # this includes must run generation self.ld_bulk_net_load_df = util.DfOper.subt((util.remove_df_levels(bulk_net_load,'period').to_frame(), must_run_sum)) self.ld_bulk_net_load = self.ld_bulk_net_load_df.squeeze().to_dict()
def calc_and_format_export_emissions(self): #calculate and format export emissions if self.supply.export_emissions is None: return None export_emissions = GeoMapper.geo_map(self.supply.export_emissions.copy(), GeoMapper.supply_primary_geography, GeoMapper.combined_outputs_geography, 'total') if 'supply_geography' not in cfg.output_combined_levels: util.remove_df_levels(export_emissions, GeoMapper.supply_primary_geography +'_supply') export_emissions = Output.clean_df(export_emissions) util.replace_index_name(export_emissions, 'FINAL_ENERGY','SUPPLY_NODE_EXPORT') index_names = export_emissions.index.names export_emissions = export_emissions.reset_index() export_emissions['FINAL_ENERGY'] = 'export ' + export_emissions['FINAL_ENERGY'] export_emissions = export_emissions.set_index(index_names).sort_index() export_emissions = util.add_to_df_index(export_emissions, names=['EXPORT/DOMESTIC', "SUPPLY/DEMAND"], keys=["EXPORT", "SUPPLY"]) return export_emissions
def calculate_d_payback_energy(self): initial_vintage = min(cfg.supply_years) demand_side_df = self.demand.d_all_energy_demand_payback demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage] sales_df = copy.deepcopy(self.demand.outputs.d_sales) util.replace_index_name(sales_df,'vintage','year') sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage] sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years) # sales_df.index = sales_df.index.reorder_levels(demand_side_df.index.names) # sales_df = sales_df.reindex(demand_side_df.index).sort_index() self.demand.outputs.d_payback_energy = util.DfOper.divi([demand_side_df, sales_df]) self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy[np.isfinite(self.demand.outputs.d_payback_energy.values)] self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.replace([np.inf,np.nan],0) for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'stock') and subsector.sub_type!='link': indexer = util.level_specific_indexer(self.demand.outputs.d_payback_energy,'subsector',subsector.id) self.demand.outputs.d_payback_energy.loc[indexer,'unit'] = subsector.stock.unit.upper() self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.set_index('unit', append=True) self.demand.outputs.d_payback_energy.columns = [cfg.calculation_energy_unit.upper()] self.demand.outputs.d_payback_energy['lifetime_year'] = self.demand.outputs.d_payback_energy.index.get_level_values('year')-self.demand.outputs.d_payback_energy.index.get_level_values('vintage')+1 self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.set_index('lifetime_year',append=True) self.demand.outputs.d_payback_energy = util.remove_df_levels(self.demand.outputs.d_payback_energy,'year') self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy.groupby(level = [x for x in self.demand.outputs.d_payback_energy.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum()) self.demand.outputs.d_payback_energy = self.demand.outputs.d_payback_energy[self.demand.outputs.d_payback_energy[cfg.calculation_energy_unit.upper()]!=0] self.demand.outputs.d_payback_energy = self.demand.outputs.return_cleaned_output('d_payback_energy')
def __init__(self, id, subsector_id, sql_id_table, sql_data_table, primary_key, data_id_key, reference=False, scenario=None): self.id = id self.subsector_id = subsector_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.scenario = scenario self.mapped = False if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.subsector_id, primary_key): if att is not None: setattr(self, col, att) DataMapFunctions.__init__(self, data_id_key) self.read_timeseries_data(subsector_id=self.subsector_id) self.raw_values = util.remove_df_levels(self.raw_values, 'technology') else: self.replaced_demand_tech_id = None # measure specific sales share does not require technology filtering Abstract.__init__(self, self.id, primary_key=primary_key, data_id_key=data_id_key)
def _validate_gaus(self): dispatch_geographies = set(GeoMapper.dispatch_geographies) geography_from_names = self.raw_values.index.get_level_values( 'gau_from') if len(set(geography_from_names) - dispatch_geographies): raise ValueError( "gau_from_names {} are found in transmission constraint name {} " "but not found in the dispatch_geographies {}".format( list(set(geography_from_names) - dispatch_geographies), self.name, GeoMapper.dispatch_geographies)) geography_to_names = self.raw_values.index.get_level_values('gau_to') if len(set(geography_to_names) - dispatch_geographies): raise ValueError( "gau_to_names {} are found in transmission constraint name {} " "but not found in the dispatch_geographies {}".format( list(set(geography_to_names) - dispatch_geographies), self.name, GeoMapper.dispatch_geographies)) if any([ name in self.raw_values.index.names for name in ('month', 'hour', 'day_type_name') ]): print 'Time slices for transmission constraints are not implemented yet, average of all combinations will be used' self.raw_values = util.remove_df_levels( self.raw_values, [name for name in ('month', 'hour', 'day_type_name')], agg_function='mean')
def __init__(self, id, supply_node_id, sql_id_table, sql_data_table, primary_key, data_id_key, reference=False, scenario=None): self.id = id self.input_type = 'total' self.supply_node_id = supply_node_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.scenario = scenario self.mapped = False if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.supply_node_id, primary_key): setattr(self, col, att) DataMapFunctions.__init__(self, data_id_key) self.read_timeseries_data(supply_node_id=self.supply_node_id) self.raw_values = util.remove_df_levels(self.raw_values, 'supply_technology') else: # measure specific sales does not require technology filtering Abstract.__init__(self, self.id, primary_key=primary_key, data_id_key=data_id_key)
def __init__(self, id, supply_node_id, sql_id_table, sql_data_table, reference=False): self.id = id self.supply_node_id = supply_node_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.mapped = False self.input_type = 'intensity' if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.supply_node_id, 'supply_node_id'): if att is not None: setattr(self, col, att) DataMapFunctions.__init__(self, 'supply_technology') self.read_timeseries_data() self.raw_values = util.remove_df_levels( self.raw_values, ['supply_node', 'supply_technology']) else: # measure specific sales share does not require technology filtering Abstract.__init__(self, self.id)
def convert(self): """ return values from raw_values that are converted to units consistent with output units - energy and annual """ if self.definition == 'absolute': if self.time_unit is None: self.time_unit = 'year' self.values = util.unit_convert( self.raw_values, unit_from_num=self.energy_unit, unit_from_den=self.time_unit, unit_to_num=cfg.calculation_energy_unit, unit_to_den='year') self.values = util.remove_df_levels(self.values, cfg.removed_demand_levels, agg_function='mean') if self.demand_tech_unit_type == 'service demand': self.values = util.unit_convert( self.values, unit_from_num=self.unit, unit_to_num=self.service_demand_unit) self.absolute = True else: self.values = self.raw_values.copy() self.absolute = False
def calculate(self, vintages, years): self.vintages = vintages self.years = years self.remap(time_index_name='vintage') self.values = util.remove_df_levels(self.values, cfg.removed_demand_levels, agg_function='mean')
def geomap_to_primary_geography(self, attr='values', inplace=True): """ maps the dataframe to primary geography """ geography_map_key = cfg.cfgfile.get( 'case', 'default_geography_map_key') if not hasattr( self, 'geography_map_key') else self.geography_map_key self.map_df_primary = cfg.geo.map_df(self.geography, cfg.primary_geography, normalize_as=self.input_type, map_key=geography_map_key) try: mapped_data = util.DfOper.mult( (getattr(self, attr), self.map_df_primary), fill_value=None) except: pdb.set_trace() if self.geography != cfg.primary_geography and self.geography != 'time zone': mapped_data = util.remove_df_levels(mapped_data, self.geography) # levels = [ind for ind in mapped_data.index.names if ((ind==self.geography and self.geography!=cfg.primary_geography) or ind=='time zone')] # mapped_data = mapped_data.groupby(level=levels).sum() mapped_data = mapped_data.swaplevel('weather_datetime', -1) if inplace: setattr(self, attr, mapped_data.sort()) self.geography_check = (cfg.primary_geography_id, tuple(sorted(cfg.primary_subset_id)), tuple(cfg.breakout_geography_id)) else: return mapped_data.sort()
def ensure_correct_geography(self, map_to, converted_geography, current_geography=None, current_data_type=None): current_data_type = copy.copy( self.input_type ) if current_data_type is None else current_data_type mapt = getattr(self, map_to) mapt_level_names = mapt.index.names if mapt.index.nlevels > 1 else [ mapt.index.name ] if converted_geography in mapt_level_names: # we have picked up the converted_geography geography if (current_geography in mapt_level_names) and ( current_geography != converted_geography): # our starting geography is still in the dataframe and is not equal to our converted_geography, remove it setattr( self, map_to, util.remove_df_levels(getattr(self, map_to), current_geography)) else: # we still need to do a geomap because mapping to a driver didn't give us our converted_geography self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type)
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index(df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels([-1]+range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi((allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) ratio_allocated_to_impacted.iloc[np.nonzero(impacted_gaus_slice.values==0)] = 0 clean_ratio = TimeSeries.clean(data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult((clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels(allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename(current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list(allocated_foreign_gau_slice_foreign_geo.index.get_level_values(y_or_v).values) allocated_foreign_gau_slice_foreign_geo = allocated_foreign_gau_slice_foreign_geo.reorder_levels(df.index.names).sort() indexer = util.level_specific_indexer(allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[indexer, :] new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') # update native GAUs after netting out foreign gaus impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def calculate_combined_emissions_results(self): #calculate and format export emissions if self.supply.export_emissions is not None: setattr(self.outputs,'export_emissions',self.supply.export_emissions) if 'supply_geography' not in cfg.output_combined_levels: util.remove_df_levels(self.outputs.export_emissions,cfg.primary_geography +'_supply') self.export_emissions_df = self.outputs.return_cleaned_output('export_emissions') del self.outputs.export_emissions util.replace_index_name(self.export_emissions_df, 'FINAL_ENERGY','SUPPLY_NODE_EXPORT') keys = ["EXPORT","SUPPLY"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND"] for key,name in zip(keys,names): self.export_emissions_df = pd.concat([self.export_emissions_df],keys=[key],names=[name]) else: self.export_emissions_df = None #calculate and format emobodied supply emissions self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions') # del self.demand.outputs.demand_embodied_emissions keys = ["DOMESTIC","SUPPLY"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND"] for key,name in zip(keys,names): self.embodied_emissions_df = pd.concat([self.embodied_emissions_df],keys=[key],names=[name]) #calculte and format direct demand emissions self.direct_emissions_df= self.demand.outputs.return_cleaned_output('demand_direct_emissions') # del self.demand.outputs.demand_direct_emissions keys = ["DOMESTIC","DEMAND"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND",cfg.primary_geography.upper() +'_EMITTED'] for key,name in zip(keys,names): self.direct_emissions_df = pd.concat([self.direct_emissions_df],keys=[key],names=[name]) if cfg.primary_geography+'_supply' in cfg.output_combined_levels: keys = self.direct_emissions_df.index.get_level_values(cfg.primary_geography.upper()).values names = cfg.primary_geography.upper() +'_SUPPLY' self.direct_emissions_df[names] = keys self.direct_emissions_df.set_index(names,append=True,inplace=True) # levels_to_keep = cfg.output_levels # levels_to_keep = [x.upper() for x in levels_to_keep] # levels_to_keep += names + [cfg.primary_geography.upper() +'_SUPPLY', 'SUPPLY_NODE'] keys = ['EXPORTED', 'SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['EMISSIONS TYPE'] self.outputs.c_emissions = util.df_list_concatenate([self.export_emissions_df, self.embodied_emissions_df, self.direct_emissions_df],keys=keys,new_names = names) # util.replace_index_name(self.outputs.c_emissions, "ENERGY","FINAL_ENERGY") util.replace_index_name(self.outputs.c_emissions, cfg.primary_geography.upper() +'-EMITTED', cfg.primary_geography.upper() +'_SUPPLY') util.replace_index_name(self.outputs.c_emissions, cfg.primary_geography.upper() +'-CONSUMED', cfg.primary_geography.upper()) self.outputs.c_emissions= self.outputs.c_emissions[self.outputs.c_emissions['VALUE']!=0] emissions_unit = cfg.cfgfile.get('case','mass_unit') self.outputs.c_emissions.columns = [emissions_unit.upper()]
def calculate_combined_emissions_results(self): #calculate and format export emissions if self.supply.export_emissions is not None: setattr(self.outputs,'export_emissions',self.supply.export_emissions) if 'supply_geography' not in cfg.output_combined_levels: util.remove_df_levels(self.outputs.export_emissions,self.geography +'_supply') self.export_emissions_df = self.outputs.return_cleaned_output('export_emissions') del self.outputs.export_emissions util.replace_index_name(self.export_emissions_df, 'FINAL_ENERGY','SUPPLY_NODE_EXPORT') keys = ["EXPORT","SUPPLY"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND"] for key,name in zip(keys,names): self.export_emissions_df = pd.concat([self.export_emissions_df],keys=[key],names=[name]) else: self.export_emissions_df = None #calculate and format emobodied supply emissions self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions') # del self.demand.outputs.demand_embodied_emissions keys = ["DOMESTIC","SUPPLY"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND"] for key,name in zip(keys,names): self.embodied_emissions_df = pd.concat([self.embodied_emissions_df],keys=[key],names=[name]) #calculte and format direct demand emissions self.direct_emissions_df= self.demand.outputs.return_cleaned_output('demand_direct_emissions') # del self.demand.outputs.demand_direct_emissions keys = ["DOMESTIC","DEMAND"] names = ['EXPORT/DOMESTIC', "SUPPLY/DEMAND"] for key,name in zip(keys,names): self.direct_emissions_df = pd.concat([self.direct_emissions_df],keys=[key],names=[name]) if 'supply_geography' in cfg.output_combined_levels: keys = self.direct_emissions_df.index.get_level_values(self.geography.upper()).values names = self.geography.upper() +'_SUPPLY' self.direct_emissions_df[names] = keys self.direct_emissions_df.set_index(names,append=True,inplace=True) # levels_to_keep = cfg.output_levels # levels_to_keep = [x.upper() for x in levels_to_keep] # levels_to_keep += names + [self.geography.upper() +'_SUPPLY', 'SUPPLY_NODE'] keys = ['EXPORTED', 'SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['EMISSIONS TYPE'] self.outputs.emissions = util.df_list_concatenate([self.export_emissions_df, self.embodied_emissions_df, self.direct_emissions_df],keys=keys,new_names = names) # util.replace_index_name(self.outputs.emissions, "ENERGY","FINAL_ENERGY") util.replace_index_name(self.outputs.emissions, self.geography.upper() +'_EMITTED', self.geography.upper() +'_SUPPLY') util.replace_index_name(self.outputs.emissions, self.geography.upper() +'_CONSUMED', self.geography.upper()) self.outputs.emissions= self.outputs.emissions[self.outputs.emissions['VALUE']!=0] emissions_unit = cfg.cfgfile.get('case','mass_unit') self.outputs.emissions.columns = [emissions_unit.upper()]
def __init__(self, name, subsector, scenario=None): schema.DemandSalesShareMeasures.__init__(self, name=name, scenario=scenario) self.init_from_db(name, scenario, subsector=subsector) self.primary_geography = GeoMapper.demand_primary_geography self.scenario = scenario self.mapped = False if self.raw_values is None: raise ValueError('error encountered in sales share measure ' + str(self.name)) self.raw_values = util.remove_df_levels(self.raw_values, 'technology')
def add_rio_stock_measures(self,rio_inputs): self.specified_stocks = {} df = rio_inputs.stock if self.name in set(df.index.get_level_values('technology')): df = util.df_slice(df,[self.name],['technology']) if np.any([isinstance(x,int) for x in df.index.get_level_values('resource_bin').values]): df = df[df.index.get_level_values('resource_bin')!='n/a'] df = df.groupby(level=df.index.names).sum() self.specified_stocks[1] = RioSpecifiedStock(df) else: self.specified_stocks[1] = RioSpecifiedStock(util.remove_df_levels(df,'resource_bin'))
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None, fill_value=0.): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if not hasattr(self, 'geography_map_key') else self.geography_map_key if current_geography == converted_geography: if inplace: return else: return getattr(self, attr) if current_data_type == 'total': subsection, supersection = converted_geography, current_geography elif current_data_type == 'intensity': subsection, supersection = current_geography, converted_geography else: raise ValueError('Input_type must be either "total" or "intensity"') # create dataframe with map from one geography to another map_df = cfg.geo.map_df(subsection, supersection, column=geography_map_key) # converted_gau = geo.geographies[converted_geography] # necessary to expand our dataframe over the new geography. keys and names set up a new dataframe level. # expanded = pd.concat([getattr(self, attr)]*len(converted_gau), keys=converted_gau, names=(converted_geography,)) mapped_data = DfOper.mult([getattr(self, attr), map_df],fill_value=fill_value) mapped_data = util.remove_df_levels(mapped_data, current_geography) if hasattr(mapped_data.index,'swaplevel'): mapped_data = mapped_data.swaplevel(converted_geography,0) mapped_data.sort(inplace=True) if inplace: setattr(self, attr, mapped_data) # setattr(self, 'geography', converted_geography) else: return mapped_data
def ensure_correct_geography(self, map_to, converted_geography, current_geography=None, current_data_type=None): current_data_type = copy.copy(self.input_type) if current_data_type is None else current_data_type mapt = getattr(self, map_to) mapt_level_names = mapt.index.names if mapt.index.nlevels > 1 else [mapt.index.name] if converted_geography in mapt_level_names: # we have picked up the converted_geography geography if (current_geography in mapt_level_names) and (current_geography != converted_geography): # our starting geography is still in the dataframe and is not equal to our converted_geography, remove it setattr(self, map_to, util.remove_df_levels(getattr(self, map_to), current_geography)) else: # we still need to do a geomap because mapping to a driver didn't give us our converted_geography self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type)
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None,fill_value=0.): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if not hasattr(self, 'geography_map_key') else self.geography_map_key if current_geography == converted_geography: if inplace: return else: return getattr(self, attr) if current_data_type == 'total': subsection, supersection = converted_geography, current_geography elif current_data_type == 'intensity': subsection, supersection = current_geography, converted_geography else: raise ValueError('Input_type must be either "total" or "intensity"') # create dataframe with map from one geography to another map_df = cfg.geo.map_df(subsection, supersection, column=geography_map_key) # converted_gau = geo.geographies[converted_geography] # necessary to expand our dataframe over the new geography. keys and names set up a new dataframe level. # expanded = pd.concat([getattr(self, attr)]*len(converted_gau), keys=converted_gau, names=(converted_geography,)) mapped_data = DfOper.mult([getattr(self, attr), map_df],fill_value=fill_value) mapped_data = util.remove_df_levels(mapped_data, current_geography) mapped_data = mapped_data.swaplevel(converted_geography,0) mapped_data.sort(inplace=True) if inplace: setattr(self, attr, mapped_data) # setattr(self, 'geography', converted_geography) else: return mapped_data
def geo_map(self, converted_geography, attr='values', inplace=True, current_geography=None, current_data_type=None, fill_value=0., filter_geo=True): """ maps a dataframe to another geography using relational GeographyMapdatabase table if input type is a total, then the subsection is the geography to convert to and the supersection is the initial geography. Example: input_type = 'total' state --> census division. How much of state maine is in census division new england? new england = subsection maine = supersection Otherwise the subsection and supersection values are reversed. Example: input_type = 'intensity' state --> census division. How much of census division new england does the state of maine represent? maine = subsection new england = supersection """ # Unless specified, input_type used is attribute of the object current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography geography_map_key = cfg.cfgfile.get( 'case', 'default_geography_map_key') if not hasattr( self, 'geography_map_key') else self.geography_map_key # create dataframe with map from one geography to another map_df = cfg.geo.map_df(current_geography, converted_geography, normalize_as=current_data_type, map_key=geography_map_key, filter_geo=filter_geo) mapped_data = DfOper.mult([getattr(self, attr), map_df], fill_value=fill_value) if current_geography != converted_geography: mapped_data = util.remove_df_levels(mapped_data, current_geography) if hasattr(mapped_data.index, 'swaplevel'): mapped_data = DataMapFunctions.reorder_df_geo_left_year_right( mapped_data, converted_geography) if inplace: setattr(self, attr, mapped_data.sort()) else: return mapped_data.sort()
def _validate_gaus(self): dispatch_geographies = set(cfg.dispatch_geographies) geography_from_ids = self.raw_values.index.get_level_values('geography_from') if len(set(geography_from_ids) - dispatch_geographies): raise ValueError("gau_from_ids {} are found in transmission constraint id {} " "but not found in the dispatch_geographies {}".format(list(set(geography_from_ids) - dispatch_geographies), self.id, cfg.dispatch_geographies)) geography_to_ids = self.raw_values.index.get_level_values('geography_to') if len(set(geography_to_ids) - dispatch_geographies): raise ValueError("gau_to_ids {} are found in transmission constraint id {} " "but not found in the dispatch_geographies {}".format(list(set(geography_to_ids) - dispatch_geographies), self.id, cfg.dispatch_geographies)) if any([name in self.raw_values.index.names for name in ('month', 'hour', 'day_type_id')]): print 'Time slices for transmission constraints are not implemented yet, average of all combinations will be used' self.raw_values = util.remove_df_levels(self.raw_values,[name for name in ('month', 'hour', 'day_type_id')],agg_function='mean')
def geomap_to_primary_geography(self, attr='values', inplace=True): """ maps the dataframe to primary geography """ geography_map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if not hasattr(self, 'geography_map_key') else self.geography_map_key self.map_df_primary = cfg.geo.map_df(self.geography, cfg.primary_geography, normalize_as=self.input_type, map_key=geography_map_key) mapped_data = util.DfOper.mult((getattr(self, attr), self.map_df_primary), fill_value=None) if self.geography!=cfg.primary_geography and self.geography!='time zone': mapped_data = util.remove_df_levels(mapped_data, self.geography) mapped_data = mapped_data.swaplevel('weather_datetime', -1) if inplace: setattr(self, attr, mapped_data.sort()) else: return mapped_data.sort()
def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False, zero_out_negatives=True): native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(df, current_geography) # we don't have any foreign gaus if not foreign_gaus or not cfg.include_foreign_gaus: return df, current_geography y_or_v = GeoMapper._get_df_time_index_name(df) index_with_nans = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels # if this is the case, we have two options (1) ignore the foreign gau (2) get rid of the other index if index_with_nans and (keep_oth_index_over_oth_gau or data_type=='intensity'): return self.filter_foreign_gaus(df, current_geography), current_geography else: assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans) # we need to eliminate levels with nan before moving on df = util.remove_df_levels(df, index_with_nans) # add missing level indicies for foreign gaus, this must be done before we fill in years because we use a fill value of zero df = self._add_missing_level_elements_to_foreign_gaus(df, current_geography) # we need all the index level combinations to have all years for this to work correctly df_no_foreign_gaus = self.filter_foreign_gaus(df, current_geography) df_years = sorted(list(set(df_no_foreign_gaus.index.get_level_values(y_or_v).values))) df = util.reindex_df_level_with_new_elements(df, y_or_v, df_years) base_gaus = np.array(self.values.index.get_level_values(current_geography), dtype=int) for foreign_gau in foreign_gaus: foreign_geography = self.gau_to_geography[foreign_gau] index = np.nonzero(self.values.index.get_level_values(self.gau_to_geography[foreign_gau])==foreign_gau)[0] impacted_gaus = list(set(base_gaus[index])) base_gaus[index] = foreign_gau if any(impacted in foreign_gaus for impacted in impacted_gaus): raise ValueError('foreign gaus in the database cannot overlap geographically') # if the data_type is a total, we need to net out the total if data_type=='total': df = self._update_dataframe_totals_after_foreign_gau(df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives) elif data_type == 'intensity': logging.debug('Foreign GAUs with intensities is not yet implemented, totals will not be conserved') assert not any([any(np.isnan(row)) for row in df.index.get_values()]) new_geography_name = self.make_new_geography_name(current_geography, list(foreign_gaus)) df.index = df.index.rename(new_geography_name, level=current_geography) if new_geography_name not in self.geographies: self.add_new_geography(new_geography_name, base_gaus) # df = GeoMapper.reorder_level_names_after_incorporating_foreign_gaus(df, new_geography_name, y_or_v) return df, new_geography_name
def __init__(self, id, supply_node_id, sql_id_table, sql_data_table, primary_key, data_id_key, reference=False): self.id = id self.input_type = 'total' self.supply_node_id = supply_node_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.mapped = False if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.supply_node_id, primary_key): setattr(self, col, att) DataMapFunctions.__init__(self, data_id_key) self.read_timeseries_data(supply_node_id=self.supply_node_id) self.raw_values = util.remove_df_levels(self.raw_values, 'supply_technology') else: # measure specific sales does not require technology filtering Abstract.__init__(self, self.id, primary_key=primary_key, data_id_key=data_id_key)
def map_df(self, current_geography, converted_geography, normalize_as='total', map_key=None, reset_index=False, eliminate_zeros=True, primary_subset_id='from config', geomap_data='from self',filter_geo=True): """ main function that maps geographies to one another Two options for two overlapping areas (A u B) / A (A is supersection) (A u B) / B (B is supersection) Examples: self.map_df('households', subsection=('state'), supersection=('census division')) "what fraction of each census division is in each state" self.map_df('households', subsection=('census division'), supersection=('state')) "what fraction of each state is in each census division """ assert normalize_as=='total' or normalize_as=='intensity' geomap_data = self.values if geomap_data=='from self' else geomap_data if primary_subset_id=='from config' and filter_geo: primary_subset_id = cfg.primary_subset_id elif (primary_subset_id is None) or (primary_subset_id is False) or (not filter_geo): primary_subset_id = [] subset_geographies = set(cfg.geo.gau_to_geography[id] for id in primary_subset_id) current_geography = util.ensure_iterable_and_not_string(current_geography) converted_geography = util.ensure_iterable_and_not_string(converted_geography) union_geo = list(subset_geographies | set(current_geography) | set(converted_geography)) level_to_remove = list(subset_geographies - set(current_geography) - set(converted_geography)) map_key = cfg.cfgfile.get('case', 'default_geography_map_key') if map_key is None else map_key table = geomap_data[map_key].groupby(level=union_geo).sum().to_frame() if normalize_as=='total': table = self._normalize(table, current_geography) if primary_subset_id: # filter the table table = table.iloc[self._get_iloc_geo_subset(table, primary_subset_id)] table = util.remove_df_levels(table, level_to_remove) table = table.reset_index().set_index(table.index.names) if normalize_as=='intensity': table = self._normalize(table, converted_geography) if reset_index: table = table.reset_index() if not eliminate_zeros: index = pd.MultiIndex.from_product(table.index.levels, names=table.index.names) table = table.reorder_levels(index.names) table = table.reindex(index, fill_value=0.0) return table
def set_rio_capacity_factor(self,rio_inputs): df =rio_inputs.capacity_factor if self.name in set(df.index.get_level_values('technology')) and self.name not in cfg.rio_excluded_technologies: df = util.df_slice(df, self.name, 'technology') if not np.any([isinstance(x,int) for x in df.index.get_level_values('resource_bin').values]): df = (util.remove_df_levels(df,'resource_bin')) else: df = df[df.index.get_level_values('resource_bin') != 'n/a'] df = df.groupby(level=df.index.names).sum() self.raw_values = df self._has_data = True self.geography = cfg.rio_geography self.capacity_or_energy_unit = cfg.rio_energy_unit self.time_unit = cfg.rio_time_unit self.input_timestep = cfg.rio_timestep_multiplier self.interpolation_method = 'linear_interpolation' self.extrapolation_method = 'nearest'
def calculate(self, vintages, years): self.vintages = vintages self.years = years if self.data and self.raw_values is not None: self.convert_cost() self.remap(map_from='values', map_to='values', time_index_name='vintage') self.values = util.remove_df_levels(self.values, cfg.removed_demand_levels, agg_function='mean') self.levelize_costs() if self.data is False: self.absolute = False if self.raw_values is None: # if the class is empty, then there is no data for conversion, so the class is considered converted self.absolute = True
def __init__(self, id, subsector_id, sql_id_table, sql_data_table, primary_key, data_id_key, reference=False): self.id = id self.subsector_id = subsector_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.mapped = False if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.subsector_id, primary_key): if att is not None: setattr(self, col, att) DataMapFunctions.__init__(self, data_id_key) self.read_timeseries_data(subsector_id=self.subsector_id) self.raw_values = util.remove_df_levels(self.raw_values, 'technology') else: self.replaced_demand_tech_id = None # measure specific sales share does not require technology filtering Abstract.__init__(self, self.id, primary_key=primary_key, data_id_key=data_id_key)
def __init__(self, id, supply_node_id, sql_id_table, sql_data_table, reference=False): self.id = id self.supply_node_id = supply_node_id self.sql_id_table = sql_id_table self.sql_data_table = sql_data_table self.mapped = False self.input_type = 'intensity' if reference: for col, att in util.object_att_from_table(self.sql_id_table, self.supply_node_id, 'supply_node_id'): if att is not None: setattr(self, col, att) DataMapFunctions.__init__(self, 'supply_technology') self.read_timeseries_data() self.raw_values = util.remove_df_levels(self.raw_values, ['supply_node','supply_technology']) else: # measure specific sales share does not require technology filtering Abstract.__init__(self, self.id)
def geomap_to_primary_geography(self, df): """ maps the dataframe to primary geography """ geography_map_key = self.geography_map_key or GeoMapper.default_geography_map_key # here we map to two geographies, the time zone and the model primary geography. If we don't do this it causes a bug whenever disaggregating input data self.map_df_primary = GeoMapper.get_instance().map_df( self.geography, ['time zone', self.primary_geography], normalize_as=self.input_type, map_key=geography_map_key) mapped_data = util.DfOper.mult((df, self.map_df_primary), fill_value=np.nan) if self.geography != self.primary_geography and self.geography != 'time zone': mapped_data = util.remove_df_levels(mapped_data, self.geography) mapped_data = mapped_data.swaplevel('weather_datetime', -1) mapped_data.sort_index() return mapped_data
def calculate_payback(self): # self.embodied_emissions_df = self.demand.outputs.return_cleaned_output('demand_embodied_emissions_tco') # del self.demand.outputs.demand_embodied_emissions #calculte and format direct demand emissions # self.direct_emissions_df = self.demand.outputs.return_cleaned_output('demand_direct_emissions') ## del self.demand.outputs.demand_direct_emissions # emissions = util.DfOper.add([self.embodied_emissions_df,self.direct_emissions_df]) # #calculate and format export costs cost_unit = cfg.cfgfile.get('case','currency_year_id') + " " + cfg.cfgfile.get('case','currency_name') initial_vintage = min(cfg.supply_years) supply_side_df = self.demand.outputs.demand_embodied_energy_costs_payback supply_side_df = supply_side_df[supply_side_df.index.get_level_values('vintage')>=initial_vintage] supply_side_df = supply_side_df[supply_side_df.index.get_level_values('year')>=initial_vintage] supply_side_df = supply_side_df.sort_index() demand_side_df = self.demand.d_annual_costs_payback demand_side_df.columns = ['value'] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('vintage')>=initial_vintage] demand_side_df = demand_side_df[demand_side_df.index.get_level_values('year')>=initial_vintage] demand_side_df = demand_side_df.reindex(supply_side_df.index).sort_index() sales_df = copy.deepcopy(self.demand.outputs.d_sales) util.replace_index_name(sales_df,'vintage','year') sales_df = sales_df[sales_df.index.get_level_values('vintage')>=initial_vintage] sales_df = util.add_and_set_index(sales_df,'year',cfg.supply_years) sales_df.index = sales_df.index.reorder_levels(supply_side_df.index.names) sales_df = sales_df.reindex(supply_side_df.index).sort_index() keys = ['SUPPLY-SIDE', 'DEMAND-SIDE'] names = ['COST TYPE'] self.outputs.c_payback = pd.concat([util.DfOper.divi([supply_side_df, sales_df]), util.DfOper.divi([demand_side_df, sales_df])],keys=keys,names=names) self.outputs.c_payback = self.outputs.c_payback[np.isfinite(self.outputs.c_payback.values)] self.outputs.c_payback = self.outputs.c_payback.replace([np.inf,np.nan],0) for sector in self.demand.sectors.values(): for subsector in sector.subsectors.values(): if hasattr(subsector,'stock') and subsector.sub_type!='link': indexer = util.level_specific_indexer(self.outputs.c_payback,'subsector',subsector.id) self.outputs.c_payback.loc[indexer,'unit'] = subsector.stock.unit.upper() self.outputs.c_payback = self.outputs.c_payback.set_index('unit', append=True) self.outputs.c_payback.columns = [cost_unit.upper()] self.outputs.c_payback['lifetime_year'] = self.outputs.c_payback.index.get_level_values('year')-self.outputs.c_payback.index.get_level_values('vintage')+1 self.outputs.c_payback = self.outputs.c_payback.set_index('lifetime_year',append=True) self.outputs.c_payback = util.remove_df_levels(self.outputs.c_payback,'year') self.outputs.c_payback = self.outputs.c_payback.groupby(level = [x for x in self.outputs.c_payback.index.names if x !='lifetime_year']).transform(lambda x: x.cumsum()) self.outputs.c_payback = self.outputs.c_payback[self.outputs.c_payback[cost_unit.upper()]!=0] self.outputs.c_payback = self.outputs.return_cleaned_output('c_payback')
def calculate(self, vintages, years): self.vintages = vintages self.years = years if self._has_data and self.raw_values is not None: self.remap(map_from='raw_values', map_to='values', time_index_name='vintage', converted_geography=GeoMapper.demand_primary_geography) util.convert_age(self, reverse=True, vintages=self.vintages, years=self.years) self.values = util.remove_df_levels(self.values, cfg.removed_demand_levels, agg_function='mean') if not self._has_data: self.absolute = False if self.raw_values is None: # if the class is empty, then there is no data for conversion, so the class is considered converted self.absolute = True
def calculate(self, vintages, years): self.vintages = vintages self.years = years self.remap(time_index_name='vintage', converted_geography=self.primary_geography) self.values = util.remove_df_levels(self.values, cfg.removed_demand_levels, agg_function='mean')
def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False,zero_out_negatives=True): native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus(df, current_geography) # we don't have any foreign gaus if not foreign_gaus or not cfg.include_foreign_gaus: return df, current_geography if 'year' in df.index.names: y_or_v = 'year' elif 'vintage' in df.index.names: y_or_v = 'vintage' else: raise ValueError('df must either have year or vintage to incorporate foreign gaus') index_with_nans = [df.index.names[i] for i in set(np.nonzero([np.isnan(row) for row in df.index.get_values()])[1])] # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels # if this is the case, we have two options (1) get rid of the other index (2) ignore the foreign gau if index_with_nans and (keep_oth_index_over_oth_gau or data_type=='intensity'): return self.filter_foreign_gaus(df, current_geography), current_geography else: assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans) # we need to eliminate levels with nan before moving on df = util.remove_df_levels(df, index_with_nans) base_gaus = np.array(self.values.index.get_level_values(current_geography), dtype=int) for id in foreign_gaus: foreign_geography = self.gau_to_geography[id] index = np.nonzero(self.values.index.get_level_values(self.gau_to_geography[id])==id)[0] impacted_gaus = list(set(base_gaus[index])) base_gaus[index] = id if any(impacted in foreign_gaus for impacted in impacted_gaus): raise ValueError('foreign gaus in the database cannot overlap geographically') # if the data_type is a total, we need to net out the total from the neighboring if data_type=='total': # we first need to do a clean time series # then we need to allocate out and subtract allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[id]) foreign_gau_slice = util.df_slice(df, id, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename(foreign_geography, level=current_geography) allocated_foreign_gau_slice = util.DfOper.mult((foreign_gau_slice, allocation)) allocated_foreign_gau_slice = util.remove_df_levels(allocated_foreign_gau_slice, foreign_geography) indexer = util.level_specific_indexer(df,current_geography,[impacted_gaus]) impacted_gaus_slice = df.loc[indexer,:].reset_index().set_index(df.index.names) impacted_gau_years = list(impacted_gaus_slice.index.get_level_values(y_or_v).values) new_impacted_gaus = util.DfOper.subt((impacted_gaus_slice, allocated_foreign_gau_slice), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels(df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError('Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}'.format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus<0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError('Year or vitages did not overlap between the foreign gaus and impacted gaus') indexer = util.level_specific_indexer(df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] assert not any([any(np.isnan(row)) for row in df.index.get_values()]) new_geography_name = self.make_new_geography_name(current_geography, list(foreign_gaus)) df.index = df.index.rename(new_geography_name, level=current_geography) if new_geography_name not in self.geographies: self.add_new_geography(new_geography_name, base_gaus) return df, new_geography_name
def _update_dataframe_totals_after_foreign_gau(self, df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives): y_or_v = GeoMapper._get_df_time_index_name(df) # we first need to do a clean time series # then we need to allocate out and subtract indexer = util.level_specific_indexer(df, current_geography, [impacted_gaus]) impacted_gaus_slice = df.loc[indexer, :].reset_index().set_index( df.index.names) foreign_gau_slice = util.df_slice(df, foreign_gau, current_geography, drop_level=False, reset_index=True) foreign_gau_slice.index = foreign_gau_slice.index.rename( foreign_geography, level=current_geography) # do the allocation, take the ratio of foreign to native, do a clean timeseries, then reconstitute the foreign gau data over all years allocation = self.map_df(foreign_geography, current_geography, map_key=map_key, primary_subset_id=[foreign_gau]) allocated_foreign_gau_slice = util.DfOper.mult( (foreign_gau_slice, allocation), fill_value=np.nan) allocated_foreign_gau_slice = allocated_foreign_gau_slice.reorder_levels( [-1] + range(df.index.nlevels)) ratio_allocated_to_impacted = util.DfOper.divi( (allocated_foreign_gau_slice, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) clean_ratio = TimeSeries.clean( data=ratio_allocated_to_impacted, time_index_name=y_or_v, interpolation_method='linear_interpolation', extrapolation_method='nearest') allocated_foreign_gau_slice_all_years = util.DfOper.mult( (clean_ratio, impacted_gaus_slice), fill_value=np.nan, non_expandable_levels=[]) allocated_foreign_gau_slice_new_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, foreign_geography) allocated_foreign_gau_slice_foreign_geo = util.remove_df_levels( allocated_foreign_gau_slice_all_years, current_geography) allocated_foreign_gau_slice_foreign_geo.index = allocated_foreign_gau_slice_foreign_geo.index.rename( current_geography, level=foreign_geography) # update foreign GAUs after clean timeseries allocated_gau_years = list( allocated_foreign_gau_slice_foreign_geo.index.get_level_values( y_or_v).values) indexer = util.level_specific_indexer( allocated_foreign_gau_slice_foreign_geo, [current_geography, y_or_v], [foreign_gau, allocated_gau_years]) try: df.loc[indexer, :] = allocated_foreign_gau_slice_foreign_geo.loc[ indexer, :] except: pdb.set_trace() new_impacted_gaus = util.DfOper.subt( (impacted_gaus_slice, allocated_foreign_gau_slice_new_geo), fill_value=np.nan, non_expandable_levels=[]) new_impacted_gaus = new_impacted_gaus.reorder_levels( df.index.names).sort() if new_impacted_gaus.min().min() < 0: if not zero_out_negatives: raise ValueError( 'Negative values resulted from subtracting the foreign gau from the base gaus. This is the resulting dataframe: {}' .format(new_impacted_gaus)) else: new_impacted_gaus[new_impacted_gaus < 0] = 0 if new_impacted_gaus.isnull().all().value: pdb.set_trace() raise ValueError( 'Year or vitages did not overlap between the foreign gaus and impacted gaus' ) # update native GAUs after netting out foreign gaus impacted_gau_years = list( impacted_gaus_slice.index.get_level_values(y_or_v).values) indexer = util.level_specific_indexer( df, [current_geography, y_or_v], [impacted_gaus, impacted_gau_years]) df.loc[indexer, :] = new_impacted_gaus.loc[indexer, :] return df
def incorporate_foreign_gaus(self, df, current_geography, data_type, map_key, keep_oth_index_over_oth_gau=False, zero_out_negatives=True): native_gaus, current_gaus, foreign_gaus = self.get_native_current_foreign_gaus( df, current_geography) # we don't have any foreign gaus if not foreign_gaus or not cfg.include_foreign_gaus: return df, current_geography y_or_v = GeoMapper._get_df_time_index_name(df) index_with_nans = [ df.index.names[i] for i in set( np.nonzero([np.isnan(row) for row in df.index.get_values()])[1]) ] # if we have an index with nan, that typically indicates that one of the foreign gaus didn't have all the index levels # if this is the case, we have two options (1) ignore the foreign gau (2) get rid of the other index if index_with_nans and (keep_oth_index_over_oth_gau or data_type == 'intensity'): return self.filter_foreign_gaus( df, current_geography), current_geography else: assert (y_or_v not in index_with_nans) and (current_geography not in index_with_nans) # we need to eliminate levels with nan before moving on df = util.remove_df_levels(df, index_with_nans) # add missing level indicies for foreign gaus, this must be done before we fill in years because we use a fill value of zero df = self._add_missing_level_elements_to_foreign_gaus( df, current_geography) # we need all the index level combinations to have all years for this to work correctly df_no_foreign_gaus = self.filter_foreign_gaus(df, current_geography) df_years = sorted( list(set( df_no_foreign_gaus.index.get_level_values(y_or_v).values))) df = util.reindex_df_level_with_new_elements(df, y_or_v, df_years) base_gaus = np.array( self.values.index.get_level_values(current_geography), dtype=int) for foreign_gau in foreign_gaus: foreign_geography = self.gau_to_geography[foreign_gau] index = np.nonzero( self.values.index.get_level_values( self.gau_to_geography[foreign_gau]) == foreign_gau)[0] impacted_gaus = list(set(base_gaus[index])) base_gaus[index] = foreign_gau if any(impacted in foreign_gaus for impacted in impacted_gaus): raise ValueError( 'foreign gaus in the database cannot overlap geographically' ) # if the data_type is a total, we need to net out the total if data_type == 'total': df = self._update_dataframe_totals_after_foreign_gau( df, current_geography, foreign_geography, impacted_gaus, foreign_gau, map_key, zero_out_negatives) elif data_type == 'intensity': logging.warning( 'Foreign GAUs with intensities is not yet implemented, totals will not be conserved' ) assert not any([any(np.isnan(row)) for row in df.index.get_values()]) new_geography_name = self.make_new_geography_name( current_geography, list(foreign_gaus)) df.index = df.index.rename(new_geography_name, level=current_geography) if new_geography_name not in self.geographies: self.add_new_geography(new_geography_name, base_gaus) # df = GeoMapper.reorder_level_names_after_incorporating_foreign_gaus(df, new_geography_name, y_or_v) return df, new_geography_name