def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography # TODO fix pluralization if time_index is None: time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years') setattr(self, map_to, getattr(self, map_from).copy()) mapf = getattr(self, map_from) if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]): raise ValueError('current geography does not match the geography of the dataframe in remap') # else: # current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist() if (drivers is None) or (not len(drivers)): if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography else: total_driver = DfOper.mult(util.put_in_list(drivers)) if current_geography != converted_geography: # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography if current_data_type == 'total': # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: # print getattr(self,map_to) # print time_index self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver),fill_value=fill_value)) else: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False), collapsible=(False, True),fill_value=fill_value)) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography # TODO fix pluralization if time_index is None: time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years') setattr(self, map_to, getattr(self, map_from).copy()) mapf = getattr(self, map_from) if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]): raise ValueError('current geography does not match the geography of the dataframe in remap') else: current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist() if (drivers is None) or (not len(drivers)): if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography else: total_driver = DfOper.mult(util.put_in_list(drivers)) if len(current_geography_index_levels) > 1 and current_geography != converted_geography: # While not on primary geography, geography does have some information we would like to preserve self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography if current_data_type == 'total': # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True)) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: # print getattr(self,map_to) # print time_index self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver))) else: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False), collapsible=(False, True))) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None, filter_geo=True, driver_geography=None): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ driver_geography = cfg.disagg_geography if driver_geography is None else driver_geography converted_geography = cfg.primary_geography if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography time_index = self._get_active_time_index(time_index, time_index_name) if current_geography not in self._get_df_index_names_in_a_list(getattr(self, map_from)): raise ValueError('Current geography does not match the geography of the dataframe in remap') # deals with foreign gaus and updates the geography df, current_geography = self.account_for_foreign_gaus(map_from, current_data_type, current_geography) setattr(self, map_to, df) # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States. setattr(self, map_to, self._add_missing_geographies(df, current_geography, current_data_type)) if (drivers is None) or (not len(drivers)): # we have no drivers, just need to do a clean timeseries and a geomap if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value,filter_geo=filter_geo) current_geography = converted_geography else: # becomes an attribute of self just because we may do a geomap on it self.total_driver = DfOper.mult(util.put_in_list(drivers)) # turns out we don't always have a year or vintage column for drivers. For instance when linked_demand_technology gets remapped if time_index_name in self.total_driver.index.names: # sometimes when we have a linked service demand driver in a demand subsector it will come in on a fewer number of years than self.years, making this clean timeseries necesary self.clean_timeseries(attr='total_driver', inplace=True, time_index_name=time_index_name, time_index=time_index, lower=None, upper=None, interpolation_method='missing', extrapolation_method='missing') # While not on primary geography, geography does have some information we would like to preserve if hasattr(self,'drivers') and len(drivers) == len(self.drivers) and set([x.input_type for x in self.drivers.values()]) == set(['intensity']) and set([x.base_driver_id for x in self.drivers.values()]) == set([None]): driver_mapping_data_type = 'intensity' else: driver_mapping_data_type = 'total' total_driver_current_geo = self.geo_map(current_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=False) if current_data_type == 'total': if fill_value is np.nan: df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf],0) else: df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) # self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='intensity', fill_value=fill_value, filter_geo=filter_geo) # total_driver_converted_geo = self.geo_map(converted_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=filter_geo) if current_data_type == 'total': setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), fill_value=fill_value)) else: try: setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), expandable=(True, False), collapsible=(False, True), fill_value=fill_value)) except: pdb.set_trace() self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='total', fill_value=fill_value, filter_geo=filter_geo) # we don't want to keep this around del self.total_driver
def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year', time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing', converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None, filter_geo=True): """ Map data to drivers and geography Args: map_from (string): starting variable name (defaults to 'raw_values') map_to (string): ending variable name (defaults to 'values') drivers (list of or single dataframe): drivers for the remap input_type_override (string): either 'total' or 'intensity' (defaults to self.type) """ converted_geography = cfg.primary_geography if converted_geography is None else converted_geography current_data_type = self.input_type if current_data_type is None else current_data_type current_geography = self.geography if current_geography is None else current_geography time_index = self._get_active_time_index(time_index, time_index_name) if current_geography not in self._get_df_index_names_in_a_list( getattr(self, map_from)): raise ValueError( 'Current geography does not match the geography of the dataframe in remap' ) # deals with foreign gaus and updates the geography df, current_geography = self.account_for_foreign_gaus( map_from, current_data_type, current_geography) setattr(self, map_to, df) # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States. setattr( self, map_to, self._add_missing_geographies(df, current_geography, current_data_type)) if (drivers is None) or (not len(drivers)): # we have no drivers, just need to do a clean timeseries and a geomap if fill_timeseries: self.clean_timeseries( attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper) if current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value, filter_geo=filter_geo) current_geography = converted_geography else: self.total_driver = DfOper.mult(util.put_in_list(drivers)) if current_geography != converted_geography and len( util.put_in_list(drivers)) <= 1: # While not on primary geography, geography does have some information we would like to preserve # we put the driver on the same geography as our data self.geomapped_total_driver = self.geo_map( current_geography, attr='total_driver', inplace=False, current_geography=converted_geography, current_data_type='total', fill_value=fill_value, filter_geo=False) elif current_geography != converted_geography: self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type=current_data_type, fill_value=fill_value) current_geography = converted_geography # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels. if current_data_type == 'total': df_intensity = DfOper.divi( (getattr(self, map_to), self.geomapped_total_driver if hasattr(self, 'geomapped_total_driver') else self.total_driver), expandable=(False, True), collapsible=(False, True), fill_value=fill_value).replace([np.inf, np.nan, -np.nan], 0) setattr(self, map_to, df_intensity) # Clean the timeseries as an intensity if fill_timeseries: self.clean_timeseries( attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method) self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='intensity', fill_value=fill_value, filter_geo=filter_geo) current_geography = converted_geography if hasattr(self, 'geomapped_total_driver'): delattr(self, 'geomapped_total_driver') if current_data_type == 'total': setattr( self, map_to, DfOper.mult((getattr(self, map_to), self.total_driver), fill_value=fill_value)) else: setattr( self, map_to, DfOper.mult((getattr(self, map_to), self.total_driver), expandable=(True, False), collapsible=(False, True), fill_value=fill_value)) self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type, filter_geo=filter_geo)