def process_shape(self, active_dates_index=None, time_slice_elements=None): self.num_active_years = len(active_dates_index)/8766. if active_dates_index is not None: self.active_dates_index = active_dates_index if active_dates_index is None: raise ValueError('processing a shape requires an active date index') self.time_slice_elements = Shapes.create_time_slice_elements(active_dates_index) if time_slice_elements is None else time_slice_elements if self.shape_type=='weather date': self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', active_dates_index) # this step is slow, consider replacing if self.values.isnull().values.any(): raise ValueError('Weather data did not give full coverage of the active dates') elif self.shape_type=='time slice': self.values = self.create_empty_shape_data() non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys] time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys] for ind, value in self.raw_values.iterrows(): non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys] time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys] if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]): continue indexer = tuple(non_time_portion + time_portion + [slice(None)]) if self.shape_unit_type=='energy': len_slice = len(self.values.loc[indexer]) self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years elif self.shape_unit_type=='power': self.values.loc[indexer] = value[0] if self.values.isnull().values.any(): raise ValueError('Shape time slice data did not give full coverage of the active dates') # reindex to remove the helper columns self.values.index = self.values.index.droplevel(self._active_time_keys) self.values = self.values.swaplevel('weather_datetime', -1).sort_index() self.geomap_to_time_zone() self.localize_shapes() self.standardize_time_across_timezones() self.geomap_to_primary_geography() self.sum_over_time_zone() self.normalize() self.add_timeshift_type()
def _add_missing_geographies(self, df, current_geography, current_data_type): current_number_of_geographies = len(util.get_elements_from_level(df, current_geography)) propper_number_of_geographies = len(cfg.geo.geographies_unfiltered[current_geography]) if current_data_type == 'total' and current_number_of_geographies != propper_number_of_geographies: # we only want to do it when we have a total, otherwise we can't just fill with zero df = util.reindex_df_level_with_new_elements(df, current_geography, cfg.geo.geographies_unfiltered[current_geography], fill_value=np.nan) return df
def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): percent_flexible = 0 if percent_flexible is None else percent_flexible hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance if percent_flexible==0 or (hr_delay==0 and hr_advance==0): return util.df_slice(shape_df, elements=2, levels='timeshift_type') timeshift_levels = list(util.get_elements_from_level(shape_df, 'timeshift_type')) timeshift_levels.sort() if timeshift_levels==[1, 2, 3]: delay = util.df_slice(shape_df, elements=1, levels='timeshift_type') native = util.df_slice(shape_df, elements=2, levels='timeshift_type') advance = util.df_slice(shape_df, elements=3, levels='timeshift_type') elif timeshift_levels==[2]: # TODO this could be a lambda function def shift(df, hr): """ positive hours is a shift forward, negative hours a shift back""" return df.shift(hr).bfill().ffill() non_weather = [n for n in shape_df.index.names if n!='weather_datetime'] delay = shape_df.groupby(level=non_weather).apply(shift, hr=hr_delay) native = shape_df advance = shape_df.groupby(level=non_weather).apply(shift, hr=-hr_advance) else: raise ValueError("elements in the level timeshift_type are not recognized") return pd.concat([delay*percent_flexible + native*(1-percent_flexible), native, advance*percent_flexible + native*(1-percent_flexible)], keys=[1,2,3], names=['timeshift_type'])
def process_shape(self): logging.info(' shape: ' + self.name) self.num_active_years = num_active_years(self.active_dates_index) if self.shape_type=='weather date': self.values = util.reindex_df_level_with_new_elements(self.raw_values, 'weather_datetime', self.active_dates_index) self.values = self.values.replace(np.nan,0)# this step is slow, consider replacing if self.values.isnull().values.any(): raise ValueError('Weather data for shape {} did not give full coverage of the active dates'.format(self.name)) elif self.shape_type=='time slice': self.values = self.create_empty_shape_data() non_time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys] time_elements_in_levels = [list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys] for ind, value in self.raw_values.iterrows(): non_time_portion = [ind[self._non_time_dict[e]] for e in self._non_time_keys] time_portion = [ind[self._active_time_dict[e]] for e in self._active_time_keys] if not np.all([s in l for s, l in zip(non_time_portion+time_portion, non_time_elements_in_levels+time_elements_in_levels)]): continue indexer = tuple(non_time_portion + time_portion + [slice(None)]) if self.shape_unit_type=='energy': len_slice = len(self.values.loc[indexer]) self.values.loc[indexer] = value[0]/float(len_slice)*self.num_active_years elif self.shape_unit_type=='power': self.values.loc[indexer] = value[0] if self.values.isnull().values.any(): raise ValueError('Shape time slice data did not give full coverage of the active dates') # reindex to remove the helper columns self.values.index = self.values.index.droplevel(self._active_time_keys) self.values = cfg.geo.filter_extra_geos_from_df(self.values.swaplevel('weather_datetime', -1).sort()) self.geomap_to_time_zone() self.localize_shapes() self.standardize_time_across_timezones() self.geomap_to_primary_geography() self.sum_over_time_zone() self.normalize() self.add_timeshift_type() # raw values can be very large, so we delete it in this one case del self.raw_values
def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): percent_flexible = 0 if percent_flexible is None else percent_flexible hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance if percent_flexible == 0 or (hr_delay == 0 and hr_advance == 0): return util.df_slice(shape_df, elements=2, levels='timeshift_type') timeshift_levels = list( util.get_elements_from_level(shape_df, 'timeshift_type')) timeshift_levels.sort() if timeshift_levels == [1, 2, 3]: delay = util.df_slice(shape_df, elements=1, levels='timeshift_type') native = util.df_slice(shape_df, elements=2, levels='timeshift_type') advance = util.df_slice(shape_df, elements=3, levels='timeshift_type') elif timeshift_levels == [2]: # TODO this could be a lambda function def shift(df, hr): """ positive hours is a shift forward, negative hours a shift back""" return df.shift(hr).bfill().ffill() non_weather = [ n for n in shape_df.index.names if n != 'weather_datetime' ] delay = shape_df.groupby(level=non_weather).apply(shift, hr=hr_delay) native = shape_df advance = shape_df.groupby(level=non_weather).apply(shift, hr=-hr_advance) else: raise ValueError( "elements in the level timeshift_type are not recognized") return pd.concat([ delay * percent_flexible + native * (1 - percent_flexible), native, advance * percent_flexible + native * (1 - percent_flexible) ], keys=[1, 2, 3], names=['timeshift_type'])
def produce_flexible_load(shape_df, percent_flexible=None, hr_delay=None, hr_advance=None): hr_delay = 0 if hr_delay is None else hr_delay hr_advance = 0 if hr_advance is None else hr_advance native_slice = shape_df.xs(2, level='timeshift_type') native_slice_stacked = pd.concat([native_slice]*3, keys=[1,2,3], names=['timeshift_type']) pflex_stacked = pd.concat([percent_flexible]*3, keys=[1,2,3], names=['timeshift_type']) timeshift_levels = sorted(list(util.get_elements_from_level(shape_df, 'timeshift_type'))) if timeshift_levels==[1, 2, 3]: # here, we have flexible load profiles already specified by the user names = shape_df.index.names full_load = shape_df.squeeze().unstack('timeshift_type') group_by_names = [n for n in full_load.index.names if n != 'weather_datetime'] full_load = full_load.groupby(level=group_by_names).apply(Shape.ensure_feasible_flexible_load) full_load = full_load.stack('timeshift_type').reorder_levels(names).sort_index().to_frame() full_load.columns = ['value'] elif timeshift_levels==[2]: non_weather = [n for n in native_slice.index.names if n!='weather_datetime'] # positive hours is a shift forward, negative hours a shift back shift = lambda df, hr: df.shift(hr).ffill().fillna(value=0) delay_load = native_slice.groupby(level=non_weather).apply(shift, hr=hr_delay) def advance_load_function(df, hr): df_adv = df.shift(-hr).ffill().fillna(value=0) df_adv.iloc[0] += df.iloc[:hr].sum().sum() return df_adv advance_load = native_slice.groupby(level=non_weather).apply(advance_load_function, hr=hr_advance) full_load = pd.concat([delay_load, native_slice, advance_load], keys=[1,2,3], names=['timeshift_type']) else: raise ValueError("elements in the level timeshift_type are not recognized") return util.DfOper.add((util.DfOper.mult((full_load, pflex_stacked), collapsible=False), util.DfOper.mult((native_slice_stacked, 1-pflex_stacked), collapsible=False)))
def process_shape(self, active_dates_index=None, time_slice_elements=None): self.num_active_years = len(active_dates_index) / 8766. if active_dates_index is not None: self.active_dates_index = active_dates_index if active_dates_index is None: raise ValueError( 'processing a shape requires an active date index') self.time_slice_elements = Shapes.create_time_slice_elements( active_dates_index ) if time_slice_elements is None else time_slice_elements if self.shape_type == 'weather date': self.values = util.reindex_df_level_with_new_elements( self.raw_values, 'weather_datetime', active_dates_index) # this step is slow, consider replacing if self.values.isnull().values.any(): raise ValueError( 'Weather data did not give full coverage of the active dates' ) elif self.shape_type == 'time slice': self.values = self.create_empty_shape_data() non_time_elements_in_levels = [ list(util.get_elements_from_level(self.values, e)) for e in self._non_time_keys ] time_elements_in_levels = [ list(util.get_elements_from_level(self.values, e)) for e in self._active_time_keys ] for ind, value in self.raw_values.iterrows(): non_time_portion = [ ind[self._non_time_dict[e]] for e in self._non_time_keys ] time_portion = [ ind[self._active_time_dict[e]] for e in self._active_time_keys ] if not np.all([ s in l for s, l in zip( non_time_portion + time_portion, non_time_elements_in_levels + time_elements_in_levels) ]): continue indexer = tuple(non_time_portion + time_portion + [slice(None)]) if self.shape_unit_type == 'energy': len_slice = len(self.values.loc[indexer]) self.values.loc[indexer] = value[0] / float( len_slice) * self.num_active_years elif self.shape_unit_type == 'power': self.values.loc[indexer] = value[0] if self.values.isnull().values.any(): raise ValueError( 'Shape time slice data did not give full coverage of the active dates' ) # reindex to remove the helper columns self.values.index = self.values.index.droplevel( self._active_time_keys) self.values = self.values.swaplevel('weather_datetime', -1).sort_index() self.geomap_to_time_zone() self.localize_shapes() self.standardize_time_across_timezones() self.geomap_to_primary_geography() self.sum_over_time_zone() self.normalize() self.add_timeshift_type()