def load_usr_demand_profiles(settings): "Temp function to load user-generated demand profiles" from powergenome.external_data import make_usr_demand_profiles lp_path = settings["input_folder"] / settings["regional_load_fn"] hourly_load_profiles = make_usr_demand_profiles(lp_path, settings) if len(hourly_load_profiles) == 8784: remove_feb_29(hourly_load_profiles) return hourly_load_profiles
def make_demand_response_profiles(path, resource_name, settings): """Read files with DR profiles across years and scenarios. Return the hourly load profiles for a single resource in the model year. Parameters ---------- path : path-like Where to load the file from resource_name : str Name of of the demand response resource settings : dict User-defined parameters from a settings file Returns ------- DataFrame 8760 hourly profiles of DR load for each region where the resource is available. Column names are the regions plus 'scenario'. """ year = settings["model_year"] scenario = settings["demand_response"] df = pd.read_csv(path, header=[0, 1, 2, 3]) # Use the MultiIndex columns to just get columns with the correct resource listed # in the top row of the csv. The resource name is dropped from the columns. resource_df = df.loc[:, resource_name] assert year in set( resource_df.columns.get_level_values(0).astype(int) ), f"The model year is not in the years of data for DR resource {resource_name}" resource_df = resource_df.loc[:, str(year)] assert scenario in set( resource_df.columns.get_level_values(0) ), f"The scenario {scenario} is not included for DR resource {resource_name}" resource_df = resource_df.loc[:, scenario] resource_df = resource_df.reset_index(drop=True) if len(resource_df) == 8784: remove_feb_29(resource_df) return resource_df
def make_load_curves( pudl_engine, settings, pudl_table="load_curves_ferc", settings_agg_key="region_aggregations", ): # IPM regions to keep. Regions not in this list will be dropped from the # dataframe keep_regions, region_agg_map = regions_to_keep(settings) # I'd rather use a sql query and only pull the regions of interest but # sqlalchemy doesn't allow table names to be parameterized. logger.info("Loading load curves from PUDL") load_curves = pd.read_sql_table( pudl_table, pudl_engine, columns=["region_id_epaipm", "time_index", "load_mw"]) load_curves = load_curves.loc[load_curves.region_id_epaipm.isin( keep_regions)] # Increase demand to account for load growth load_curves = add_load_growth(load_curves, settings) # Set a new column "region" to the old column values. Then replace values for any # regions that are being aggregated load_curves.loc[:, "region"] = load_curves.loc[:, "region_id_epaipm"] load_curves.loc[ load_curves.region_id_epaipm.isin(region_agg_map.keys()), "region"] = load_curves.loc[ load_curves.region_id_epaipm.isin(region_agg_map.keys()), "region_id_epaipm"].map(region_agg_map) logger.info("Aggregating load curves in grouped regions") load_curves_agg = load_curves.groupby(["region", "time_index"]).sum() lc_wide = load_curves_agg.unstack(level=0) lc_wide.columns = lc_wide.columns.droplevel() if len(lc_wide) == 8784: lc_wide = remove_feb_29(lc_wide) # Shift load from UTC for col in lc_wide: lc_wide[col] = np.roll(lc_wide[col].values, settings.get("utc_offset", 0)) lc_wide.index.name = "time_index" if lc_wide.index.min() == 0: lc_wide.index = lc_wide.index + 1 return lc_wide
def make_distributed_gen_profiles(pudl_engine, settings): """Create 8760 annual generation profiles for distributed generation in regions. Uses a distribution loss parameter in the settings file when DG generation is defined a fraction of delivered load. Parameters ---------- dg_profiles_path : path-like Where to load the file from pudl_engine : sqlalchemy.Engine A sqlalchemy connection for use by pandas. Needed to create base load profiles. settings : dict User-defined parameters from a settings file Returns ------- DataFrame Hourly generation profiles for DG resources in each region. Not all regions need to be accounted for. Raises ------ KeyError If the calculation method specified in settings is not 'capacity' or 'fraction_load' """ year = settings["model_year"] dg_profiles_path = (Path(settings["input_folder"]) / settings["distributed_gen_profiles_fn"]) hourly_norm_profiles = pd.read_csv(dg_profiles_path) profile_regions = hourly_norm_profiles.columns dg_calc_methods = settings["distributed_gen_method"] dg_calc_values = settings["distributed_gen_values"] assert ( year in dg_calc_values ), f"The years in settings parameter 'distributed_gen_values' do not match the model years." for region in dg_calc_values[year]: assert region in set(profile_regions), ( "The profile regions in settings parameter 'distributed_gen_values' do not\n" f"match the regions in {settings['distributed_gen_profiles_fn']} for year {year}" ) if "fraction_load" in dg_calc_methods.values(): regional_load = make_load_curves(pudl_engine, settings) dg_hourly_gen = pd.DataFrame(columns=dg_calc_methods.keys()) for region, method in dg_calc_methods.items(): region_norm_profile = hourly_norm_profiles[region] region_calc_value = dg_calc_values[year][region] if method == "capacity": dg_hourly_gen[region] = calc_dg_capacity_method( region_norm_profile, region_calc_value) elif method == "fraction_load": region_load = regional_load[region] dg_hourly_gen[region] = calc_dg_frac_load_method( region_norm_profile, region_calc_value, region_load, settings) else: raise KeyError( "The settings parameter 'distributed_gen_method' can only have key " "values of 'capapacity' or 'fraction_load' for each region.\n" f"The value in your settings file is {method}") if len(dg_hourly_gen) == 8784: remove_feb_29(dg_hourly_gen) return dg_hourly_gen