Exemplo n.º 1
0
    def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year',
              time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing',
              converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None):
        """ Map data to drivers and geography
        Args:
            map_from (string): starting variable name (defaults to 'raw_values')
            map_to (string): ending variable name (defaults to 'values')
            drivers (list of or single dataframe): drivers for the remap
            input_type_override (string): either 'total' or 'intensity' (defaults to self.type)
        """
        converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography
        current_data_type = self.input_type if current_data_type is None else current_data_type
        current_geography = self.geography if current_geography is None else current_geography
        # TODO fix pluralization
        if time_index is None:
            time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years')
        
        setattr(self, map_to, getattr(self, map_from).copy())
        
        mapf = getattr(self, map_from)
        if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]):
            raise ValueError('current geography does not match the geography of the dataframe in remap')
#        else:
#            current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist()

        if (drivers is None) or (not len(drivers)):
            if fill_timeseries:     
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper)
            if current_geography != converted_geography:
                self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography,
                             current_data_type=current_data_type, fill_value=fill_value)
                current_geography = converted_geography
        else:
            total_driver = DfOper.mult(util.put_in_list(drivers))
            if current_geography != converted_geography:
                # While not on primary geography, geography does have some information we would like to preserve
                self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography,
                             current_data_type=current_data_type, fill_value=fill_value)
                current_geography = converted_geography

            if current_data_type == 'total':
                # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels.
                df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0)         
                setattr(self, map_to, df_intensity)
            # Clean the timeseries as an intensity
            if fill_timeseries:
                # print getattr(self,map_to)
                # print time_index
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method)
            
            if current_data_type == 'total':
                setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver),fill_value=fill_value))
            else:
                setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False),
                                                  collapsible=(False, True),fill_value=fill_value))
            self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
Exemplo n.º 2
0
    def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year',
              time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing',
              converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None):
        """ Map data to drivers and geography
        Args:
            map_from (string): starting variable name (defaults to 'raw_values')
            map_to (string): ending variable name (defaults to 'values')
            drivers (list of or single dataframe): drivers for the remap
            input_type_override (string): either 'total' or 'intensity' (defaults to self.type)
        """
        converted_geography = cfg.cfgfile.get('case', 'primary_geography') if converted_geography is None else converted_geography
        current_data_type = self.input_type if current_data_type is None else current_data_type
        current_geography = self.geography if current_geography is None else current_geography
        # TODO fix pluralization
        if time_index is None:
            time_index = getattr(self, time_index_name + "s") if hasattr(self, time_index_name + "s") else cfg.cfgfile.get('case', 'years')
        
        setattr(self, map_to, getattr(self, map_from).copy())
        
        mapf = getattr(self, map_from)
        if current_geography not in (mapf.index.names if mapf.index.nlevels > 1 else [mapf.index.name]):
            raise ValueError('current geography does not match the geography of the dataframe in remap')
        else:
            current_geography_index_levels = mapf.index.levels[util.position_in_index(mapf, current_geography)] if mapf.index.nlevels > 1 else mapf.index.tolist()

        if (drivers is None) or (not len(drivers)):
            if fill_timeseries:     
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method, lower=lower, upper=upper)
            if current_geography != converted_geography:
                self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography,
                             current_data_type=current_data_type, fill_value=fill_value)
                current_geography = converted_geography
        else:
            total_driver = DfOper.mult(util.put_in_list(drivers))
            
            if len(current_geography_index_levels) > 1 and current_geography != converted_geography:
                # While not on primary geography, geography does have some information we would like to preserve
                self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography,
                             current_data_type=current_data_type, fill_value=fill_value)
                current_geography = converted_geography

            if current_data_type == 'total':
                # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels.
                df_intensity = DfOper.divi((getattr(self, map_to), total_driver), expandable=(False, True), collapsible=(False, True))
                setattr(self, map_to, df_intensity)
            # Clean the timeseries as an intensity
            if fill_timeseries:
                # print getattr(self,map_to)
                # print time_index
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method)
            if current_data_type == 'total':
                setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver)))
            else:
                setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver), expandable=(True, False),
                                                  collapsible=(False, True)))
            self.ensure_correct_geography(map_to, converted_geography, current_geography, current_data_type)
Exemplo n.º 3
0
    def remap(self, map_from='raw_values', map_to='values', drivers=None, time_index_name='year',
              time_index=None, fill_timeseries=True, interpolation_method='missing', extrapolation_method='missing',
              converted_geography=None, current_geography=None, current_data_type=None, fill_value=0., lower=0, upper=None, filter_geo=True, driver_geography=None):
        """ Map data to drivers and geography
        Args:
            map_from (string): starting variable name (defaults to 'raw_values')
            map_to (string): ending variable name (defaults to 'values')
            drivers (list of or single dataframe): drivers for the remap
            input_type_override (string): either 'total' or 'intensity' (defaults to self.type)
        """
        driver_geography = cfg.disagg_geography if driver_geography is None else driver_geography
        converted_geography = cfg.primary_geography if converted_geography is None else converted_geography
        current_data_type = self.input_type if current_data_type is None else current_data_type
        current_geography = self.geography if current_geography is None else current_geography
        time_index = self._get_active_time_index(time_index, time_index_name)
        if current_geography not in self._get_df_index_names_in_a_list(getattr(self, map_from)):
            raise ValueError('Current geography does not match the geography of the dataframe in remap')

        # deals with foreign gaus and updates the geography
        df, current_geography = self.account_for_foreign_gaus(map_from, current_data_type, current_geography)
        setattr(self, map_to, df)

        # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States.
        setattr(self, map_to, self._add_missing_geographies(df, current_geography, current_data_type))

        if (drivers is None) or (not len(drivers)):
            # we have no drivers, just need to do a clean timeseries and a geomap
            if fill_timeseries:     
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, time_index_name=time_index_name,
                                      interpolation_method=interpolation_method, extrapolation_method=extrapolation_method,
                                      lower=lower, upper=upper)
            if current_geography != converted_geography:
                self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography,
                             current_data_type=current_data_type, fill_value=fill_value,filter_geo=filter_geo)
                current_geography = converted_geography
        else:
            # becomes an attribute of self just because we may do a geomap on it
            self.total_driver = DfOper.mult(util.put_in_list(drivers))
            # turns out we don't always have a year or vintage column for drivers. For instance when linked_demand_technology gets remapped
            if time_index_name in self.total_driver.index.names:
                # sometimes when we have a linked service demand driver in a demand subsector it will come in on a fewer number of years than self.years, making this clean timeseries necesary
                self.clean_timeseries(attr='total_driver', inplace=True, time_index_name=time_index_name, time_index=time_index, lower=None, upper=None, interpolation_method='missing', extrapolation_method='missing')

            # While not on primary geography, geography does have some information we would like to preserve
            if hasattr(self,'drivers') and len(drivers) == len(self.drivers) and set([x.input_type for x in self.drivers.values()]) == set(['intensity']) and set([x.base_driver_id for x in self.drivers.values()]) == set([None]):
                driver_mapping_data_type = 'intensity'
            else:
                driver_mapping_data_type = 'total'
            total_driver_current_geo = self.geo_map(current_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=False)                          
            if current_data_type == 'total':
                if fill_value is np.nan:
                    df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf],0)
                else:
                    df_intensity = DfOper.divi((getattr(self, map_to), total_driver_current_geo), expandable=(False, True), collapsible=(False, True),fill_value=fill_value).replace([np.inf,np.nan,-np.nan],0)
                setattr(self, map_to, df_intensity)

            # Clean the timeseries as an intensity
            if fill_timeseries:
                self.clean_timeseries(attr=map_to, inplace=True, time_index=time_index, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method)

#            self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='intensity', fill_value=fill_value, filter_geo=filter_geo)
#            total_driver_converted_geo = self.geo_map(converted_geography, attr='total_driver', inplace=False, current_geography=driver_geography, current_data_type=driver_mapping_data_type, fill_value=fill_value, filter_geo=filter_geo)
            if current_data_type == 'total':
                setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), fill_value=fill_value))
            else:
                try:
                    setattr(self, map_to, DfOper.mult((getattr(self, map_to), total_driver_current_geo), expandable=(True, False), collapsible=(False, True), fill_value=fill_value))
                except:
                    pdb.set_trace()
            self.geo_map(converted_geography, attr=map_to, inplace=True, current_geography=current_geography, current_data_type='total', fill_value=fill_value, filter_geo=filter_geo)
            # we don't want to keep this around
            del self.total_driver
    def remap(self,
              map_from='raw_values',
              map_to='values',
              drivers=None,
              time_index_name='year',
              time_index=None,
              fill_timeseries=True,
              interpolation_method='missing',
              extrapolation_method='missing',
              converted_geography=None,
              current_geography=None,
              current_data_type=None,
              fill_value=0.,
              lower=0,
              upper=None,
              filter_geo=True):
        """ Map data to drivers and geography
        Args:
            map_from (string): starting variable name (defaults to 'raw_values')
            map_to (string): ending variable name (defaults to 'values')
            drivers (list of or single dataframe): drivers for the remap
            input_type_override (string): either 'total' or 'intensity' (defaults to self.type)
        """
        converted_geography = cfg.primary_geography if converted_geography is None else converted_geography
        current_data_type = self.input_type if current_data_type is None else current_data_type
        current_geography = self.geography if current_geography is None else current_geography
        time_index = self._get_active_time_index(time_index, time_index_name)

        if current_geography not in self._get_df_index_names_in_a_list(
                getattr(self, map_from)):
            raise ValueError(
                'Current geography does not match the geography of the dataframe in remap'
            )

        # deals with foreign gaus and updates the geography
        df, current_geography = self.account_for_foreign_gaus(
            map_from, current_data_type, current_geography)
        setattr(self, map_to, df)

        # This happens when we are on a geography level and some of the elements are missing. Such as no PR when we have all the other U.S. States.
        setattr(
            self, map_to,
            self._add_missing_geographies(df, current_geography,
                                          current_data_type))

        if (drivers is None) or (not len(drivers)):
            # we have no drivers, just need to do a clean timeseries and a geomap
            if fill_timeseries:
                self.clean_timeseries(
                    attr=map_to,
                    inplace=True,
                    time_index=time_index,
                    time_index_name=time_index_name,
                    interpolation_method=interpolation_method,
                    extrapolation_method=extrapolation_method,
                    lower=lower,
                    upper=upper)
            if current_geography != converted_geography:
                self.geo_map(converted_geography,
                             attr=map_to,
                             inplace=True,
                             current_geography=current_geography,
                             current_data_type=current_data_type,
                             fill_value=fill_value,
                             filter_geo=filter_geo)
                current_geography = converted_geography
        else:
            self.total_driver = DfOper.mult(util.put_in_list(drivers))
            if current_geography != converted_geography and len(
                    util.put_in_list(drivers)) <= 1:
                # While not on primary geography, geography does have some information we would like to preserve
                # we put the driver on the same geography as our data
                self.geomapped_total_driver = self.geo_map(
                    current_geography,
                    attr='total_driver',
                    inplace=False,
                    current_geography=converted_geography,
                    current_data_type='total',
                    fill_value=fill_value,
                    filter_geo=False)
            elif current_geography != converted_geography:
                self.geo_map(converted_geography,
                             attr=map_to,
                             inplace=True,
                             current_geography=current_geography,
                             current_data_type=current_data_type,
                             fill_value=fill_value)
                current_geography = converted_geography
            # Divide by drivers to turn a total to intensity. multindex_operation will aggregate to common levels.
            if current_data_type == 'total':
                df_intensity = DfOper.divi(
                    (getattr(self, map_to), self.geomapped_total_driver
                     if hasattr(self, 'geomapped_total_driver') else
                     self.total_driver),
                    expandable=(False, True),
                    collapsible=(False, True),
                    fill_value=fill_value).replace([np.inf, np.nan, -np.nan],
                                                   0)
                setattr(self, map_to, df_intensity)
            # Clean the timeseries as an intensity
            if fill_timeseries:
                self.clean_timeseries(
                    attr=map_to,
                    inplace=True,
                    time_index=time_index,
                    interpolation_method=interpolation_method,
                    extrapolation_method=extrapolation_method)

            self.geo_map(converted_geography,
                         attr=map_to,
                         inplace=True,
                         current_geography=current_geography,
                         current_data_type='intensity',
                         fill_value=fill_value,
                         filter_geo=filter_geo)
            current_geography = converted_geography

            if hasattr(self, 'geomapped_total_driver'):
                delattr(self, 'geomapped_total_driver')

            if current_data_type == 'total':
                setattr(
                    self, map_to,
                    DfOper.mult((getattr(self, map_to), self.total_driver),
                                fill_value=fill_value))
            else:
                setattr(
                    self, map_to,
                    DfOper.mult((getattr(self, map_to), self.total_driver),
                                expandable=(True, False),
                                collapsible=(False, True),
                                fill_value=fill_value))

        self.ensure_correct_geography(map_to,
                                      converted_geography,
                                      current_geography,
                                      current_data_type,
                                      filter_geo=filter_geo)