def save_csv(model_data, path, dropna=True): """ If termination condition was not optimal, filters inputs only, and warns that results will not be saved. """ os.makedirs(path, exist_ok=False) # a MILP model which optimises to within the MIP gap, but does not fully # converge on the LP relaxation, may return as 'feasible', not 'optimal' if ('termination_condition' not in model_data.attrs or model_data.attrs['termination_condition'] in ['optimal', 'feasible']): data_vars = model_data.data_vars else: data_vars = model_data.filter_by_attrs(is_result=0).data_vars exceptions.warn( 'Model termination condition was not optimal, saving inputs only.' ) for var in data_vars: in_out = 'results' if model_data[var].attrs['is_result'] else 'inputs' out_path = os.path.join(path, '{}_{}.csv'.format(in_out, var)) series = split_loc_techs(model_data[var], return_as='Series') if dropna: series = series.dropna() series.to_csv(out_path, header=True)
def check_optimality(self): termination = self._model._model_data.attrs.get( 'termination_condition', 'did_not_yet_run') if termination not in ['optimal', 'did_not_yet_run']: warn( 'Model termination condition was not optimal. Plotting may fail!' )
def save_csv(model_data, path, dropna=True): """ If termination condition was not optimal, filters inputs only, and warns that results will not be saved. """ os.makedirs(path, exist_ok=False) # a MILP model which optimises to within the MIP gap, but does not fully # converge on the LP relaxation, may return as 'feasible', not 'optimal' if "termination_condition" not in model_data.attrs or model_data.attrs[ "termination_condition"] in ["optimal", "feasible"]: data_vars = model_data.data_vars else: data_vars = model_data.filter_by_attrs(is_result=0).data_vars exceptions.warn( "Model termination condition was not optimal, saving inputs only.") for var in data_vars: in_out = "results" if model_data[var].attrs["is_result"] else "inputs" out_path = os.path.join(path, "{}_{}.csv".format(in_out, var)) series = model_data[var].to_series() if dropna: series = series.dropna() series.to_csv(out_path, header=True)
def load_overrides_from_scenario(config_model, scenario): def _get_overrides(scenario_name): _overrides = config_model.get_key(f"scenarios.{scenario_name}", [scenario_name]) if isinstance(_overrides, list): return _overrides else: return [_overrides] if scenario in config_model.get("scenarios", {}).keys(): if "," in scenario: exceptions.warn( f"Scenario name `{scenario}` includes commas that won't be parsed as a list of overrides." ) logger.info("Loading overrides from scenario: {} ".format(scenario)) scenario_list = _get_overrides(scenario) else: scenario_list = scenario.split(",") scenario_overrides = set() for override in scenario_list: if isinstance(override, dict): raise exceptions.ModelError( "Scenario definition must be a list of override or other scenario names." ) if override in config_model.get("scenarios", {}).keys(): scenario_overrides.update( load_overrides_from_scenario(config_model, override)) else: scenario_overrides.add(override) return list(scenario_overrides)
def hartigan_n_clusters(X, threshold=10): """ Try clustering using sklearn.cluster.kmeans, for several cluster sizes. Using Hartigan's rule, we will return the number of clusters after which the benefit of clustering is low. """ def _H_rule(inertia, inertia_plus_one, n_clusters, len_input): # see http://www.dcs.bbk.ac.uk/~mirkin/papers/00357_07-216RR_mirkin.pdf return ( (inertia / inertia_plus_one) - 1) * (len_input - n_clusters - 1) len_input = len(X) n_clusters = 1 HK = threshold + 1 while n_clusters <= len_input and HK > threshold: kmeans = sk_cluster.KMeans(n_clusters=n_clusters).fit(X) kmeans_plus_one = sk_cluster.KMeans(n_clusters=n_clusters + 1).fit(X) inertia = kmeans.inertia_ inertia_plus_one = kmeans_plus_one.inertia_ HK = _H_rule(inertia, inertia_plus_one, n_clusters, len_input) n_clusters += 1 if HK > threshold: # i.e. we went to the limit where n_clusters = len_input exceptions.warn( "Based on threshold, number of clusters = number of dates") return len_input else: return n_clusters - 1
def hartigan_n_clusters(X, threshold=10): """ Try clustering using sklearn.cluster.kmeans, for several cluster sizes. Using Hartigan's rule, we will return the number of clusters after which the benefit of clustering is low. """ def _H_rule(inertia, inertia_plus_one, n_clusters, len_input): # see http://www.dcs.bbk.ac.uk/~mirkin/papers/00357_07-216RR_mirkin.pdf return ((inertia / inertia_plus_one) - 1) * (len_input - n_clusters - 1) len_input = len(X) n_clusters = 1 HK = threshold + 1 while n_clusters <= len_input and HK > threshold: kmeans = sk_cluster.KMeans(n_clusters=n_clusters).fit(X) kmeans_plus_one = sk_cluster.KMeans(n_clusters=n_clusters + 1).fit(X) inertia = kmeans.inertia_ inertia_plus_one = kmeans_plus_one.inertia_ HK = _H_rule(inertia, inertia_plus_one, n_clusters, len_input) n_clusters += 1 if HK > threshold: # i.e. we went to the limit where n_clusters = len_input exceptions.warn("Based on threshold, number of clusters = number of dates") return len_input else: return n_clusters - 1
def solve_model(backend_model, solver, solver_io=None, solver_options=None, save_logs=False, **solve_kwargs): """ Solve a Pyomo model using the chosen solver and all necessary solver options Returns a Pyomo results object """ opt = SolverFactory(solver, solver_io=solver_io) if solver_options: for k, v in solver_options.items(): opt.options[k] = v if save_logs: solve_kwargs.update({ 'symbolic_solver_labels': True, 'keepfiles': True }) os.makedirs(save_logs, exist_ok=True) TempfileManager.tempdir = save_logs # Sets log output dir if 'warmstart' in solve_kwargs.keys() and solver in ['glpk', 'cbc']: exceptions.warn( 'The chosen solver, {}, does not suport warmstart, which may ' 'impact performance.'.format(solver)) del solve_kwargs['warmstart'] with redirect_stdout(LogWriter(logger, 'debug', strip=True)): with redirect_stderr(LogWriter(logger, 'error', strip=True)): results = opt.solve(backend_model, tee=True, **solve_kwargs) return results
def get_clusters_kmeans(data, tech=None, timesteps=None, k=None, variables=None): """ Parameters ---------- data : xarray.Dataset Should be normalized tech : list, optional list of strings referring to technologies by which clustering is undertaken. If none (default), all technologies within timeseries variables will be used. timesteps : list or str, optional Subset of the time domain within which to apply clustering. k : int, optional Number of clusters to create. If none (default), will use Hartigan's rule to infer a reasonable number of clusters. variables : list, optional data variables (e.g. `resource`, `energy_eff`) by whose values the data will be clustered. If none (default), all timeseries variables will be used. Returns ------- clusters : dataframe Indexed by timesteps and with locations as columns, giving cluster membership for first timestep of each day. centroids """ timesteps_per_day = len(data.attrs['_daily_timesteps']) if timesteps is not None: data = data.loc[{'timesteps': timesteps}] else: timesteps = data.timesteps.values X = reshape_for_clustering(data, tech, variables) if not k: k = hartigan_n_clusters(X) exceptions.warn( 'Used Hartigan rule to get {} kmeans clusters'.format(k)) clustered_data = sk_cluster.KMeans(k).fit(X) # Determine the cluster membership of each day day_clusters = clustered_data.labels_ # Create mapping of timesteps to clusters clusters = pd.Series(day_clusters, index=timesteps[::timesteps_per_day]) # Reshape centroids centroids = reshape_clustered(clustered_data.cluster_centers_, data, tech, variables) # Get inertia, for e.g. checking clustering with Hartigan's rule inertia = clustered_data.inertia_ return clusters, centroids, inertia
def check_optimality(self): termination = self._model._model_data.attrs.get( "termination_condition", "did_not_yet_run" ) # a MILP model which optimises to within the MIP gap, but does not fully # converge on the LP relaxation, may return as 'feasible', not 'optimal' if termination not in ["optimal", "did_not_yet_run", "feasible"]: warn("Model termination condition was not optimal. Plotting may fail!")
def get_clusters_hierarchical(data, timesteps_per_day, tech=None, max_d=None, k=None, variables=None): """ Parameters ---------- data : xarray.Dataset Should be normalized timesteps_per_day tech : list, optional list of strings referring to technologies by which clustering is undertaken. If none (default), all technologies within timeseries variables will be used. max_d : float or int, optional Max distance for returning clusters. k : int, optional Number of clusters to create. If none (default), will use Hartigan's rule to infer a reasonable number of clusters. variables : list, optional data variables (e.g. `resource`, `energy_eff`) by whose values the data will be clustered. If none (default), all timeseries variables will be used. Returns ------- clusters X Z """ X = reshape_for_clustering(data, tech, variables) # Generate the linkage matrix Z = hierarchy.linkage(X, 'ward') if max_d: # Get clusters based on maximum distance clusters = hierarchy.fcluster(Z, max_d, criterion='distance') elif k: # Get clusters based on number of desired clusters clusters = hierarchy.fcluster(Z, k, criterion='maxclust') else: k = hartigan_n_clusters(X) exceptions.warn('Used Hartigan\'s rule to determine ' '{} is a good number of clusters.'.format(k)) clusters = hierarchy.fcluster(Z, k, criterion='maxclust') # Make sure clusters are a pd.Series with a datetime index if clusters is not None: timesteps = data.coords['timesteps'].values # All timesteps clusters = pd.Series(clusters, index=timesteps[::timesteps_per_day]) return (clusters, X, Z)
def solve_model( backend_model, solver, solver_io=None, solver_options=None, save_logs=False, opt=None, **solve_kwargs, ): """ Solve a Pyomo model using the chosen solver and all necessary solver options Returns a Pyomo results object """ if opt is None: opt = SolverFactory(solver, solver_io=solver_io) if "persistent" in solver: solve_kwargs.update({ "save_results": False, "load_solutions": False }) opt.set_instance(backend_model) if solver_options: for k, v in solver_options.items(): opt.options[k] = v if save_logs: solve_kwargs.update({ "symbolic_solver_labels": True, "keepfiles": True }) os.makedirs(save_logs, exist_ok=True) TempfileManager.tempdir = save_logs # Sets log output dir if "warmstart" in solve_kwargs.keys() and solver in ["glpk", "cbc"]: if solve_kwargs.pop("warmstart") is True: exceptions.warn( "The chosen solver, {}, does not suport warmstart, which may " "impact performance.".format(solver)) with redirect_stdout(LogWriter(logger, "debug", strip=True)): with redirect_stderr(LogWriter(logger, "error", strip=True)): # Ignore most of gurobipy's logging, as it's output is # already captured through STDOUT logging.getLogger("gurobipy").setLevel(logging.ERROR) if "persistent" in solver: results = opt.solve(tee=True, **solve_kwargs) else: results = opt.solve(backend_model, tee=True, **solve_kwargs) return results, opt
def read_netcdf(path): """Read model_data from NetCDF file""" with xr.open_dataset(path) as model_data: model_data.load() calliope_version = model_data.attrs.get('calliope_version', False) if calliope_version: if not str(calliope_version) in __version__: exceptions.warn( 'This model data was created with Calliope version {}, ' 'but you are running {}. Proceed with caution!') # FIXME some checks for consistency # use check_dataset from the checks module # also check the old checking from 0.5.x return model_data
def read_netcdf(path): """Read model_data from NetCDF file""" with xr.open_dataset(path) as model_data: model_data.load() calliope_version = model_data.attrs.get('calliope_version', False) if calliope_version: if not str(calliope_version) in __version__: exceptions.warn( 'This model data was created with Calliope version {}, ' 'but you are running {}. Proceed with caution!'.format(calliope_version, __version__) ) # FIXME some checks for consistency # use check_dataset from the checks module # also check the old checking from 0.5.x return model_data
def save_csv(model_data, path, dropna=True): """ If termination condition was not optimal, filters inputs only, and warns that results will not be saved. """ os.makedirs(path, exist_ok=False) if ('termination_condition' not in model_data.attrs or model_data.attrs['termination_condition'] == 'optimal'): data_vars = model_data.data_vars else: data_vars = model_data.filter_by_attrs(is_result=0).data_vars exceptions.warn( 'Model termination condition was not optimal, saving inputs only.') for var in data_vars: in_out = 'results' if model_data[var].attrs['is_result'] else 'inputs' out_path = os.path.join(path, '{}_{}.csv'.format(in_out, var)) series = split_loc_techs(model_data[var], as_='Series') if dropna: series = series.dropna() series.to_csv(out_path)
def save_csv(model_data, path, dropna=True): """ If termination condition was not optimal, filters inputs only, and warns that results will not be saved. """ os.makedirs(path, exist_ok=False) if ('termination_condition' not in model_data.attrs or model_data.attrs['termination_condition'] == 'optimal'): data_vars = model_data.data_vars else: data_vars = model_data.filter_by_attrs(is_result=0).data_vars exceptions.warn( 'Model termination condition was not optimal, saving inputs only.' ) for var in data_vars: in_out = 'results' if model_data[var].attrs['is_result'] else 'inputs' out_path = os.path.join(path, '{}_{}.csv'.format(in_out, var)) series = split_loc_techs(model_data[var], as_='Series') if dropna: series = series.dropna() series.to_csv(out_path)
def run_operate(model_data, timings, backend, build_only): """ For use when mode is 'operate', to allow the model to be built, edited, and iteratively run within Pyomo. """ log_time(logger, timings, 'run_start', comment='Backend: starting model run in operational mode') defaults = AttrDict.from_yaml_string(model_data.attrs['defaults']) run_config = AttrDict.from_yaml_string(model_data.attrs['run_config']) operate_params = ['purchased'] + [ i.replace('_max', '') for i in defaults if i[-4:] == '_max' ] # Capacity results (from plan mode) can be used as the input to operate mode if (any(model_data.filter_by_attrs(is_result=1).data_vars) and run_config.get('operation.use_cap_results', False)): # Anything with is_result = 1 will be ignored in the Pyomo model for varname, varvals in model_data.data_vars.items(): if varname in operate_params: varvals.attrs['is_result'] = 1 varvals.attrs['operate_param'] = 1 else: cap_max = xr.merge([ v.rename(k.replace('_max', '')) for k, v in model_data.data_vars.items() if '_max' in k ]) cap_equals = xr.merge([ v.rename(k.replace('_equals', '')) for k, v in model_data.data_vars.items() if '_equals' in k ]) caps = cap_max.update(cap_equals) for cap in caps.data_vars.values(): cap.attrs['is_result'] = 1 cap.attrs['operate_param'] = 1 model_data.update(caps) # Storage initial is carried over between iterations, so must be defined along with storage if ('loc_techs_store' in model_data.dims.keys() and 'storage_initial' not in model_data.data_vars.keys()): model_data['storage_initial'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_store.values], dims='loc_techs_store')) model_data['storage_initial'].attrs['is_result'] = 0 exceptions.warn( 'Initial stored energy not defined, set to zero for all ' 'loc::techs in loc_techs_store, for use in iterative optimisation') # Operated units is carried over between iterations, so must be defined in a milp model if ('loc_techs_milp' in model_data.dims.keys() and 'operated_units' not in model_data.data_vars.keys()): model_data['operated_units'] = (xr.DataArray( [0 for loc_tech in model_data.loc_techs_milp.values], dims='loc_techs_milp')) model_data['operated_units'].attrs['is_result'] = 1 model_data['operated_units'].attrs['operate_param'] = 1 exceptions.warn( 'daily operated units not defined, set to zero for all ' 'loc::techs in loc_techs_milp, for use in iterative optimisation') comments, warnings, errors = checks.check_operate_params(model_data) exceptions.print_warnings_and_raise_errors(warnings=warnings, errors=errors) # Initialize our variables solver = run_config['solver'] solver_io = run_config.get('solver_io', None) solver_options = run_config.get('solver_options', None) save_logs = run_config.get('save_logs', None) window = run_config['operation']['window'] horizon = run_config['operation']['horizon'] window_to_horizon = horizon - window # get the cumulative sum of timestep resolution, to find where we hit our window and horizon timestep_cumsum = model_data.timestep_resolution.cumsum( 'timesteps').to_pandas() # get the timesteps at which we start and end our windows window_ends = timestep_cumsum.where((timestep_cumsum % window == 0) | ( timestep_cumsum == timestep_cumsum[-1])) window_starts = timestep_cumsum.where((~np.isnan(window_ends.shift(1))) | ( timestep_cumsum == timestep_cumsum[0])).dropna() window_ends = window_ends.dropna() horizon_ends = timestep_cumsum[timestep_cumsum.isin(window_ends.values + window_to_horizon)] if not any(window_starts): raise exceptions.ModelError( 'Not enough timesteps or incorrect timestep resolution to run in ' 'operational mode with an optimisation window of {}'.format( window)) # We will only update timseries parameters timeseries_data_vars = [ k for k, v in model_data.data_vars.items() if 'timesteps' in v.dims and v.attrs['is_result'] == 0 ] # Loop through each window, solve over the horizon length, and add result to # result_array we only go as far as the end of the last horizon, which may # clip the last bit of data result_array = [] # track whether each iteration finds an optimal solution or not terminations = [] if build_only: iterations = [0] else: iterations = range(len(window_starts)) for i in iterations: start_timestep = window_starts.index[i] # Build full model in first instance if i == 0: warmstart = False end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time(logger, timings, 'model_gen_1', comment='Backend: generating initial model') backend_model = backend.generate_model(window_model_data) # Build the full model in the last instance(s), # where number of timesteps is less than the horizon length elif i > len(horizon_ends) - 1: warmstart = False end_timestep = window_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, 'model_gen_{}'.format(i + 1), comment=( 'Backend: iteration {}: generating new model for ' 'end of timeseries, with horizon = {} timesteps'.format( i + 1, window_ends[i] - window_starts[i]))) backend_model = backend.generate_model(window_model_data) # Update relevent Pyomo Params in intermediate instances else: warmstart = True end_timestep = horizon_ends.index[i] timesteps = slice(start_timestep, end_timestep) window_model_data = model_data.loc[dict(timesteps=timesteps)] log_time( logger, timings, 'model_gen_{}'.format(i + 1), comment='Backend: iteration {}: updating model parameters'. format(i + 1)) # Pyomo model sees the same timestamps each time, we just change the # values associated with those timestamps for var in timeseries_data_vars: # New values var_series = window_model_data[var].to_series().dropna( ).replace('inf', np.inf) # Same timestamps var_series.index = backend_model.__calliope_model_data['data'][ var].keys() var_dict = var_series.to_dict() # Update pyomo Param with new dictionary getattr(backend_model, var).store_values(var_dict) if not build_only: log_time(logger, timings, 'model_run_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: sending model to solver'. format(i + 1)) # After iteration 1, warmstart = True, which should speed up the process # Note: Warmstart isn't possible with GLPK (dealt with later on) _results = backend.solve_model( backend_model, solver=solver, solver_io=solver_io, solver_options=solver_options, save_logs=save_logs, warmstart=warmstart, ) log_time(logger, timings, 'run_solver_exit_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: solver finished running'. format(i + 1)) # xarray dataset is built for each iteration _termination = backend.load_results(backend_model, _results) terminations.append(_termination) _results = backend.get_result_array(backend_model, model_data) # We give back the actual timesteps for this iteration and take a slice # equal to the window length _results['timesteps'] = window_model_data.timesteps.copy() # We always save the window data. Until the last window(s) this will crop # the window_to_horizon timesteps. In the last window(s), optimistion will # only be occurring over a window length anyway _results = _results.loc[dict( timesteps=slice(None, window_ends.index[i]))] result_array.append(_results) # Set up initial storage for the next iteration if 'loc_techs_store' in model_data.dims.keys(): storage_initial = _results.storage.loc[{ 'timesteps': window_ends.index[i] }].drop('timesteps') model_data['storage_initial'].loc[ storage_initial.coords] = storage_initial.values backend_model.storage_initial.store_values( storage_initial.to_series().dropna().to_dict()) # Set up total operated units for the next iteration if 'loc_techs_milp' in model_data.dims.keys(): operated_units = _results.operating_units.sum( 'timesteps').astype(np.int) model_data['operated_units'].loc[{}] += operated_units.values backend_model.operated_units.store_values( operated_units.to_series().dropna().to_dict()) log_time(logger, timings, 'run_solver_exit_{}'.format(i + 1), time_since_run_start=True, comment='Backend: iteration {}: generated solution array'. format(i + 1)) if build_only: results = xr.Dataset() else: # Concatenate results over the timestep dimension to get a single # xarray Dataset of interest results = xr.concat(result_array, dim='timesteps') if all(i == 'optimal' for i in terminations): results.attrs['termination_condition'] = 'optimal' elif all(i in ['optimal', 'feasible'] for i in terminations): results.attrs['termination_condition'] = 'feasible' else: results.attrs['termination_condition'] = ','.join(terminations) log_time(logger, timings, 'run_solution_returned', time_since_run_start=True, comment='Backend: generated full solution array') return results, backend_model
def add_time_dimension(data, model_run): """ Once all constraints and costs have been loaded into the model dataset, any timeseries data is loaded from file and substituted into the model dataset Parameters: ----------- data : xarray Dataset A data structure which has already gone through `constraints_to_dataset`, `costs_to_dataset`, and `add_attributes` model_run : AttrDict Calliope model_run dictionary Returns: -------- data : xarray Dataset A data structure with an additional time dimension to the input dataset, with all relevant `file=` and `df= `entries replaced with the correct data. """ key_errors = [] # Search through every constraint/cost for use of '=' for variable in model_run.timeseries_vars: # 2) convert to a Pandas Series to do 'string contains' search data_series = data[variable].to_series().dropna() # 3) get Series of all uses of 'file=' or 'df=' for this variable (timeseries keys) try: tskeys = data_series[data_series.str.contains("file=") | data_series.str.contains("df=")] except AttributeError: continue # 4) If no use of 'file=' or 'df=' then we can be on our way if tskeys.empty: continue # 5) remove all before '=' and split filename and node column tskeys = (tskeys.str.split("=").str[1].str.rsplit( ":", 1, expand=True).reset_index().rename(columns={ 0: "source", 1: "column" }).set_index(["source", "column"])) # 6) Get all timeseries data from dataframes stored in model_run try: timeseries_data = model_run.timeseries_data.loc[:, tskeys.index] except KeyError: key_errors.append( f"file:column combinations `{tskeys.index.values}` not found, but are" f" requested by parameter `{variable}`.") continue timeseries_data.columns = pd.MultiIndex.from_frame(tskeys) # 7) Add time dimension to the relevent DataArray and update the '=' # dimensions with the time varying data (static data is just duplicated # at each timestep) data[variable] = (xr.DataArray.from_series( timeseries_data.unstack()).reindex(data[variable].coords).fillna( data[variable])) if key_errors: exceptions.print_warnings_and_raise_errors(errors=key_errors) # Add timestep_resolution by looking at the time difference between timestep n # and timestep n + 1 for all timesteps # Last timestep has no n + 1, so will be NaT (not a time), we ffill this. # Time resolution is saved in hours (i.e. nanoseconds / 3600e6) data["timestep_resolution"] = data.timesteps.diff( "timesteps", label="lower").reindex({ "timesteps": data.timesteps }).ffill("timesteps").rename("timestep_resolution") / pd.Timedelta( "1 hour") if len(data.timesteps) == 1: exceptions.warn( "Only one timestep defined. Inferring timestep resolution to be 1 hour" ) data["timestep_resolution"] = data["timestep_resolution"].fillna(1) data["timestep_weights"] = xr.DataArray(np.ones(len(data.timesteps)), dims=["timesteps"]) return data
def get_clusters( data, func, timesteps_per_day, tech=None, timesteps=None, k=None, variables=None, **kwargs ): """ Run a clustering algorithm on the timeseries data supplied. All timeseries data is reshaped into one row per day before clustering into similar days. Parameters ---------- data : xarray.Dataset Should be normalized func : str 'kmeans' or 'hierarchical' for KMeans or Agglomerative clustering, respectively timesteps_per_day : int Total number of timesteps in a day tech : list, optional list of strings referring to technologies by which clustering is undertaken. If none (default), all technologies within timeseries variables will be used. timesteps : list or str, optional Subset of the time domain within which to apply clustering. k : int, optional Number of clusters to create. If none (default), will use Hartigan's rule to infer a reasonable number of clusters. variables : list, optional data variables (e.g. `resource`, `energy_eff`) by whose values the data will be clustered. If none (default), all timeseries variables will be used. kwargs : dict Additional keyword arguments available depend on the `func`. For available KMeans kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html For available hierarchical kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html Returns ------- clusters : dataframe Indexed by timesteps and with locations as columns, giving cluster membership for first timestep of each day. clustered_data : sklearn.cluster object Result of clustering using sklearn.KMeans(k).fit(X) or sklearn.KMeans(k).AgglomerativeClustering(X). Allows user to access specific attributes, for detailed statistical analysis. """ if timesteps is not None: data = data.loc[{'timesteps': timesteps}] else: timesteps = data.timesteps.values X = reshape_for_clustering(data, tech, variables) if func == 'kmeans': if not k: k = hartigan_n_clusters(X) exceptions.warn( 'Used Hartigan\'s rule to determine that' 'a good number of clusters is {}.'.format(k) ) clustered_data = sk_cluster.KMeans(k).fit(X) elif func == 'hierarchical': if not k: raise exceptions.ModelError( 'Cannot undertake hierarchical clustering without a predefined ' 'number of clusters (k)' ) clustered_data = sk_cluster.AgglomerativeClustering(k).fit(X) # Determine the cluster membership of each day day_clusters = clustered_data.labels_ # Create mapping of timesteps to clusters clusters = pd.Series(day_clusters, index=timesteps[::timesteps_per_day]) return clusters, clustered_data
def get_clusters(data, func, timesteps_per_day, tech=None, timesteps=None, k=None, variables=None, **kwargs): """ Run a clustering algorithm on the timeseries data supplied. All timeseries data is reshaped into one row per day before clustering into similar days. Parameters ---------- data : xarray.Dataset Should be normalized func : str 'kmeans' or 'hierarchical' for KMeans or Agglomerative clustering, respectively timesteps_per_day : int Total number of timesteps in a day tech : list, optional list of strings referring to technologies by which clustering is undertaken. If none (default), all technologies within timeseries variables will be used. timesteps : list or str, optional Subset of the time domain within which to apply clustering. k : int, optional Number of clusters to create. If none (default), will use Hartigan's rule to infer a reasonable number of clusters. variables : list, optional data variables (e.g. `resource`, `energy_eff`) by whose values the data will be clustered. If none (default), all timeseries variables will be used. kwargs : dict Additional keyword arguments available depend on the `func`. For available KMeans kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.KMeans.html For available hierarchical kwargs see: http://scikit-learn.org/stable/modules/generated/sklearn.cluster.AgglomerativeClustering.html Returns ------- clusters : dataframe Indexed by timesteps and with locations as columns, giving cluster membership for first timestep of each day. clustered_data : sklearn.cluster object Result of clustering using sklearn.KMeans(k).fit(X) or sklearn.KMeans(k).AgglomerativeClustering(X). Allows user to access specific attributes, for detailed statistical analysis. """ if timesteps is not None: data = data.loc[{"timesteps": timesteps}] else: timesteps = data.timesteps.values X = reshape_for_clustering(data, tech, variables) if func == "kmeans": if not k: k = hartigan_n_clusters(X) exceptions.warn("Used Hartigan's rule to determine that" "a good number of clusters is {}.".format(k)) clustered_data = sk_cluster.KMeans(k).fit(X) elif func == "hierarchical": if not k: raise exceptions.ModelError( "Cannot undertake hierarchical clustering without a predefined " "number of clusters (k)") clustered_data = sk_cluster.AgglomerativeClustering(k).fit(X) # Determine the cluster membership of each day day_clusters = clustered_data.labels_ # Create mapping of timesteps to clusters clusters = pd.Series(day_clusters, index=timesteps[::timesteps_per_day]) return clusters, clustered_data
def map_clusters_to_data(data, clusters, how, daily_timesteps, storage_inter_cluster=True): """ Returns a copy of data that has been clustered. Parameters ---------- how : str How to select data from clusters. Can be mean (centroid) or closest real day to the mean (by root mean square error). storage_inter_cluster : bool, default=True If True, add `datesteps` to model_data, for use in the backend to build inter_cluster storage decision variables and constraints """ # FIXME hardcoded time intervals ('1H', '1D') # Get all timesteps, not just the first per day timesteps_per_day = len(daily_timesteps) idx = clusters.index new_idx = _timesteps_from_daily_index(idx, daily_timesteps) clusters_timeseries = clusters.reindex(new_idx).fillna( method="ffill").astype(int) new_data = get_mean_from_clusters(data, clusters_timeseries, timesteps_per_day) new_data.attrs = data.attrs if how == "mean": # Add timestep names by taking the median timestamp from daily clusters... # (a random way of doing it, but we want some label to apply) timestamps = clusters.groupby(clusters).apply( lambda x: x.index[int(len(x.index) / 2)]) new_data.coords["timesteps"] = _timesteps_from_daily_index( pd.Index(timestamps.values), daily_timesteps) # Generate weights # weight of each timestep = number of timesteps in this timestep's cluster # divided by timesteps per day (since we're grouping days together and # a cluster consisting of 1 day = 24 hours should have weight of 1) value_counts = clusters_timeseries.value_counts() / timesteps_per_day # And turn the index into dates (days) value_counts = pd.DataFrame({ "dates": timestamps, "counts": value_counts }).set_index("dates")["counts"] elif how == "closest": new_data, chosen_ts = get_closest_days_from_clusters( data, new_data, clusters, daily_timesteps) # Deal with the case where more than one cluster has the same closest day # An easy way is to rename the original clusters with the chosen days # So at this point, clusterdays_timeseries maps all timesteps to the day # of year of the cluster the timestep belongs to clusterdays_timeseries = clusters_timeseries.map( lambda x: chosen_ts[x]) value_counts = clusterdays_timeseries.value_counts( ) / timesteps_per_day timestamps = pd.DataFrame.from_dict(chosen_ts, orient="index")[0] cluster_diff = len(clusters.unique()) - len(timestamps.unique()) if cluster_diff > 0: exceptions.warn( "Creating {} fewer clusters as some clusters share the same " "closest day".format(cluster_diff)) timestamps = timestamps.drop_duplicates() for cluster, date in timestamps.items(): clusterdays_timeseries.loc[clusterdays_timeseries == date] = cluster clusters = clusterdays_timeseries.resample("1D").mean() _clusters = xr.DataArray( data=np.full(len(new_data.timesteps.values), np.nan), dims="timesteps", coords={"timesteps": new_data.timesteps.values}, ) for cluster, date in timestamps.items(): _clusters.loc[date.strftime("%Y-%m-%d")] = cluster new_data["timestep_cluster"] = _clusters.astype(int) weights = value_counts.reindex( _timesteps_from_daily_index(value_counts.index, daily_timesteps)).fillna(method="ffill") new_data["timestep_weights"] = xr.DataArray(weights, dims=["timesteps"]) days = np.unique(new_data.timesteps.to_index().date) new_data["timestep_resolution"] = xr.DataArray( np.tile(daily_timesteps, len(days)), dims=["timesteps"], coords={"timesteps": new_data["timesteps"]}, ) if storage_inter_cluster: clusters.index.name = "datesteps" new_data["lookup_datestep_cluster"] = xr.DataArray.from_series( clusters) timestamps.index.name = "clusters" new_data.coords["clusters"] = timestamps.index return new_data
def process_locations(model_config, modelrun_techs): """ Process locations by taking an AttrDict that may include compact keys such as ``1,2,3``, and returning an AttrDict with: * exactly one key per location with all of its settings * fully resolved installed technologies for each location * fully expanded transmission links for each location Parameters ---------- model_config : AttrDict modelrun_techs : AttrDict Returns ------- locations : AttrDict locations_comments : AttrDict """ techs_in = model_config.techs.copy() tech_groups_in = model_config.tech_groups locations_in = model_config.locations links_in = model_config.get('links', AttrDict()) allowed_from_file = defaults['file_allowed'] warnings = [] errors = [] locations_comments = AttrDict() ## # Expand compressed `loc1,loc2,loc3,loc4: ...` definitions ## locations = AttrDict() for key in locations_in: if ('--' in key) or (',' in key): key_locs = explode_locations(key) for subkey in key_locs: _set_loc_key(locations, subkey, locations_in[key]) else: _set_loc_key(locations, key, locations_in[key]) ## # Kill any locations that the modeller does not want to exist ## for loc in list(locations.keys()): if not locations[loc].get('exists', True): locations.del_key(loc) ## # Process technologies ## techs_to_delete = [] for tech_name in techs_in: if not techs_in[tech_name].get('exists', True): techs_to_delete.append(tech_name) continue # Get inheritance chain generated in process_techs() inheritance_chain = modelrun_techs[tech_name].inheritance # Get and save list of required_constraints from base technology base_tech = inheritance_chain[-1] rq = model_config.tech_groups[base_tech].required_constraints # locations[loc_name].techs[tech_name].required_constraints = rq techs_in[tech_name].required_constraints = rq # Kill any techs that the modeller does not want to exist for tech_name in techs_to_delete: del techs_in[tech_name] ## # Fully expand all installed technologies for the location, # filling in any undefined parameters from defaults ## location_techs_to_delete = [] for loc_name, loc in locations.items(): if 'techs' not in loc: # Mark this as a transmission-only node if it has not allowed # any technologies locations[loc_name].transmission_node = True locations_comments.set_key( '{}.transmission_node'.format(loc_name), 'Automatically inserted: specifies that this node is ' 'a transmission-only node.' ) continue # No need to process any technologies at this node for tech_name in loc.techs: if tech_name in techs_to_delete: # Techs that were removed need not be further considered continue if not isinstance(locations[loc_name].techs[tech_name], dict): locations[loc_name].techs[tech_name] = AttrDict() # Starting at top of the inheritance chain, for each level, # check if the level has location-specific group settings # and keep merging together the settings, overwriting as we # go along. tech_settings = AttrDict() for parent in reversed(modelrun_techs[tech_name].inheritance): # Does the parent group have model-wide settings? tech_settings.union(tech_groups_in[parent], allow_override=True) # Does the parent group have location-specific settings? if ('tech_groups' in locations[loc_name] and parent in locations[loc_name].tech_groups): tech_settings.union( locations[loc_name].tech_groups[parent], allow_override=True) # Now overwrite with the tech's own model-wide # and location-specific settings tech_settings.union(techs_in[tech_name], allow_override=True) if tech_name in locations[loc_name].techs: tech_settings.union( locations[loc_name].techs[tech_name], allow_override=True) tech_settings = cleanup_undesired_keys(tech_settings) # Resolve columns in filename if necessary file_configs = [ i for i in tech_settings.keys_nested() if (isinstance(tech_settings.get_key(i), str) and 'file=' in tech_settings.get_key(i)) ] for config_key in file_configs: if config_key.split('.')[-1] not in allowed_from_file: # Allow any custom settings that end with _time_varying # FIXME: add this to docs if config_key.endswith('_time_varying'): warn('Using custom constraint ' '{} with time-varying data.'.format(config_key)) else: raise ModelError('`file=` not allowed in {}'.format(config_key)) config_value = tech_settings.get_key(config_key, '') if ':' not in config_value: config_value = '{}:{}'.format(config_value, loc_name) tech_settings.set_key(config_key, config_value) tech_settings = compute_depreciation_rates(tech_name, tech_settings, warnings, errors) # Now merge the tech settings into the location-specific # tech dict -- but if a tech specifies ``exists: false``, # we kill it at this location if not tech_settings.get('exists', True): location_techs_to_delete.append('{}.techs.{}'.format(loc_name, tech_name)) else: locations[loc_name].techs[tech_name].union( tech_settings, allow_override=True ) for k in location_techs_to_delete: locations.del_key(k) # Generate all transmission links processed_links = AttrDict() for link in links_in: loc_from, loc_to = link.split(',') # Skip this link entirely if it has been told not to exist if not links_in[link].get('exists', True): continue # Also skip this link - and warn about it - if it links to a # now-inexistant (because removed) location if (loc_from not in locations.keys() or loc_to not in locations.keys()): warnings.append( 'Not building the link {},{} because one or both of its ' 'locations have been removed from the model by setting ' '``exists: false``'.format(loc_from, loc_to) ) continue processed_transmission_techs = AttrDict() for tech_name in links_in[link].techs: # Skip techs that have been told not to exist # for this particular link if not links_in[link].get_key('techs.{}.exists'.format(tech_name), True): continue if tech_name not in processed_transmission_techs: tech_settings = AttrDict() # Combine model-wide settings from all parent groups for parent in reversed(modelrun_techs[tech_name].inheritance): tech_settings.union( tech_groups_in[parent], allow_override=True ) # Now overwrite with the tech's own model-wide settings tech_settings.union( techs_in[tech_name], allow_override=True ) # Add link-specific constraint overrides if links_in[link].techs[tech_name]: tech_settings.union( links_in[link].techs[tech_name], allow_override=True ) tech_settings = cleanup_undesired_keys(tech_settings) tech_settings = process_per_distance_constraints(tech_name, tech_settings, locations, locations_comments, loc_from, loc_to) tech_settings = compute_depreciation_rates(tech_name, tech_settings, warnings, errors) processed_transmission_techs[tech_name] = tech_settings else: tech_settings = processed_transmission_techs[tech_name] processed_links.set_key( '{}.links.{}.techs.{}'.format(loc_from, loc_to, tech_name), tech_settings.copy() ) processed_links.set_key( '{}.links.{}.techs.{}'.format(loc_to, loc_from, tech_name), tech_settings.copy() ) # If this is a one-way link, we set the constraints for energy_prod # and energy_con accordingly on both parts of the link if tech_settings.get_key('constraints.one_way', False): processed_links.set_key( '{}.links.{}.techs.{}.constraints.energy_prod'.format(loc_from, loc_to, tech_name), False) processed_links.set_key( '{}.links.{}.techs.{}.constraints.energy_con'.format(loc_to, loc_from, tech_name), False) locations.union(processed_links, allow_override=True) return locations, locations_comments, list(set(warnings)), list(set(errors))
def rerun_pyomo_model(model_data, run_config, backend_model): """ Rerun the Pyomo backend, perhaps after updating a parameter value, (de)activating a constraint/objective or updating run options in the model model_data object (e.g. `run.solver`). Returns ------- new_model : calliope.Model New calliope model, including both inputs and results, but no backend interface. """ backend_model.__calliope_run_config = run_config if run_config["mode"] != "plan": raise exceptions.ModelError( "Cannot rerun the backend in {} run mode. Only `plan` mode is " "possible.".format(run_config["mode"])) timings = {} log_time(logger, timings, "model_creation") results, backend_model = backend_run.run_plan( model_data, run_config, timings, run_pyomo, build_only=False, backend_rerun=backend_model, ) inputs = access_pyomo_model_inputs(backend_model) # Add additional post-processed result variables to results if results.attrs.get("termination_condition", None) in ["optimal", "feasible"]: results = postprocess_model_results(results, model_data.reindex(results.coords), timings) for key, var in results.data_vars.items(): var.attrs["is_result"] = 1 for key, var in inputs.data_vars.items(): var.attrs["is_result"] = 0 new_model_data = xr.merge((results, inputs)) new_model_data.attrs.update(model_data.attrs) new_model_data.attrs.update(results.attrs) # Only add coordinates from the original model_data that don't already exist new_coords = [ i for i in model_data.coords.keys() if i not in new_model_data.coords.keys() ] new_model_data = new_model_data.update(model_data[new_coords]) # Reorganise the coordinates so that model data and new model data share # the same order of items in each dimension new_model_data = new_model_data.reindex(model_data.coords) exceptions.warn( "The results of rerunning the backend model are only available within " "the Calliope model returned by this function call.") new_calliope_model = calliope.Model(config=None, model_data=new_model_data) new_calliope_model._timings = timings return new_calliope_model
def run_plan( model_data, run_config, timings, backend, build_only, backend_rerun=False, allow_warmstart=False, persistent=True, opt=None, ): log_time(logger, timings, "run_start", comment="Backend: starting model run") warmstart = False if not backend_rerun: backend_model = backend.generate_model(model_data) log_time( logger, timings, "run_backend_model_generated", time_since_run_start=True, comment="Backend: model generated", ) else: backend_model = backend_rerun if allow_warmstart: warmstart = True run_config = UpdateObserverDict( initial_yaml_string=model_data.attrs["run_config"], name="run_config", observer=model_data, ) solver = run_config["solver"] solver_io = run_config.get("solver_io", None) solver_options = run_config.get("solver_options", None) save_logs = run_config.get("save_logs", None) if build_only: results = xr.Dataset() else: if "persistent" in solver and persistent is False: exceptions.warn( f"The chosen solver, `{solver}` will not be used in this run. " f"`{solver.replace('_persistent', '')}` will be used instead.") solver = solver.replace("_persistent", "") log_time( logger, timings, "run_solver_start", comment="Backend: sending model to solver", ) backend_results, opt = backend.solve_model( backend_model, solver=solver, solver_io=solver_io, solver_options=solver_options, save_logs=save_logs, warmstart=warmstart, opt=opt, ) log_time( logger, timings, "run_solver_exit", time_since_run_start=True, comment="Backend: solver finished running", ) termination = backend.load_results(backend_model, backend_results, opt) log_time(logger, timings, "run_results_loaded", comment="Backend: loaded results") if termination in ["optimal", "feasible"]: results = backend.get_result_array(backend_model, model_data) results.attrs["termination_condition"] = termination if "persistent" in opt.name and persistent is True: results.attrs["objective_function_value"] = opt.get_model_attr( "ObjVal") else: results.attrs["objective_function_value"] = backend_model.obj() else: results = xr.Dataset(attrs={"termination_condition": termination}) log_time( logger, timings, "run_solution_returned", time_since_run_start=True, comment="Backend: generated solution array", ) return results, backend_model, opt
def generate_constraint_sets(model_run): """ Generate loc-tech sets for a given pre-processed ``model_run`` Parameters ---------- model_run : AttrDict """ sets = model_run.sets # From here on, everything is a `key=value` pair within a dictionary constraint_sets = dict() # energy_balance.py constraint_sets["loc_carriers_system_balance_constraint"] = sets.loc_carriers constraint_sets[ "loc_techs_balance_supply_constraint" ] = sets.loc_techs_finite_resource_supply constraint_sets[ "loc_techs_balance_demand_constraint" ] = sets.loc_techs_finite_resource_demand constraint_sets[ "loc_techs_resource_availability_supply_plus_constraint" ] = sets.loc_techs_finite_resource_supply_plus constraint_sets[ "loc_techs_balance_transmission_constraint" ] = sets.loc_techs_transmission constraint_sets[ "loc_techs_balance_supply_plus_constraint" ] = sets.loc_techs_supply_plus constraint_sets["loc_techs_balance_storage_constraint"] = sets.loc_techs_storage if model_run.run.cyclic_storage is True: constraint_sets["loc_techs_storage_initial_constraint"] = [ i for i in sets.loc_techs_store if constraint_exists(model_run, i, "constraints.storage_initial") is not None ] constraint_sets["loc_techs_storage_discharge_depth"] = [ i for i in sets.loc_techs_store if constraint_exists(model_run, i, "constraints.storage_discharge_depth") ] constraint_sets["carriers_reserve_margin_constraint"] = [ i for i in sets.carriers if i in model_run.model.get_key("reserve_margin", {}).keys() ] # clustering-specific balance constraints if model_run.model.get_key( "time.function", None ) == "apply_clustering" and model_run.model.get_key( "time.function_options.storage_inter_cluster", True ): set_name = "loc_techs_balance_storage_inter_cluster_constraint" constraint_sets[set_name] = sets.loc_techs_store # costs.py constraint_sets["loc_techs_cost_constraint"] = sets.loc_techs_cost constraint_sets[ "loc_techs_cost_investment_constraint" ] = sets.loc_techs_investment_cost constraint_sets["loc_techs_cost_var_constraint"] = [ i for i in sets.loc_techs_om_cost if i not in sets.loc_techs_conversion_plus + sets.loc_techs_conversion ] # export.py constraint_sets["loc_carriers_update_system_balance_constraint"] = [ i for i in sets.loc_carriers if sets.loc_techs_export and any( [ "{0}::{2}".format(*j.split("::")) == i for j in sets.loc_tech_carriers_export ] ) ] constraint_sets[ "loc_tech_carriers_export_balance_constraint" ] = sets.loc_tech_carriers_export constraint_sets["loc_techs_update_costs_var_constraint"] = [ i for i in sets.loc_techs_om_cost if i in sets.loc_techs_export ] constraint_sets["loc_tech_carriers_export_max_constraint"] = [ i for i in sets.loc_tech_carriers_export if constraint_exists(model_run, i.rsplit("::", 1)[0], "constraints.export_cap") is not None ] # capacity.py constraint_sets["loc_techs_storage_capacity_constraint"] = [ i for i in sets.loc_techs_store if i not in sets.loc_techs_milp ] constraint_sets["loc_techs_energy_capacity_storage_constraint_old"] = [ i for i in sets.loc_techs_store if constraint_exists(model_run, i, "constraints.charge_rate") ] constraint_sets["loc_techs_energy_capacity_storage_equals_constraint"] = [ i for i in sets.loc_techs_store if constraint_exists( model_run, i, "constraints.energy_cap_per_storage_cap_equals" ) ] constraint_sets["loc_techs_energy_capacity_storage_min_constraint"] = [ i for i in sets.loc_techs_store if constraint_exists(model_run, i, "constraints.energy_cap_per_storage_cap_min") and not constraint_exists( model_run, i, "constraints.energy_cap_per_storage_cap_equals" ) ] constraint_sets["loc_techs_energy_capacity_storage_max_constraint"] = [ i for i in sets.loc_techs_store if constraint_exists(model_run, i, "constraints.energy_cap_per_storage_cap_max") and not constraint_exists( model_run, i, "constraints.energy_cap_per_storage_cap_equals" ) ] constraint_sets["loc_techs_resource_capacity_constraint"] = [ i for i in sets.loc_techs_finite_resource_supply_plus if any( [ constraint_exists(model_run, i, "constraints.resource_cap_equals"), constraint_exists(model_run, i, "constraints.resource_cap_max"), constraint_exists(model_run, i, "constraints.resource_cap_min"), ] ) ] constraint_sets["loc_techs_resource_capacity_equals_energy_capacity_constraint"] = [ i for i in sets.loc_techs_finite_resource_supply_plus if constraint_exists(model_run, i, "constraints.resource_cap_equals_energy_cap") ] constraint_sets["loc_techs_resource_area_constraint"] = sets.loc_techs_area constraint_sets["loc_techs_resource_area_per_energy_capacity_constraint"] = [ i for i in sets.loc_techs_area if constraint_exists(model_run, i, "constraints.resource_area_per_energy_cap") is not None ] constraint_sets["locs_resource_area_capacity_per_loc_constraint"] = [ i for i in sets.locs if model_run.locations[i].get_key("available_area", None) is not None and sets.loc_techs_area ] constraint_sets["loc_techs_energy_capacity_constraint"] = [ i for i in sets.loc_techs if i not in sets.loc_techs_milp + sets.loc_techs_purchase ] constraint_sets["techs_energy_capacity_systemwide_constraint"] = [ i for i in sets.techs if model_run.get_key( "techs.{}.constraints.energy_cap_max_systemwide".format(i), None ) or model_run.get_key( "techs.{}.constraints.energy_cap_equals_systemwide".format(i), None ) ] # dispatch.py constraint_sets["loc_tech_carriers_carrier_production_max_constraint"] = [ i for i in sets.loc_tech_carriers_prod if i not in sets.loc_tech_carriers_conversion_plus and i.rsplit("::", 1)[0] not in sets.loc_techs_milp ] constraint_sets["loc_tech_carriers_carrier_production_min_constraint"] = [ i for i in sets.loc_tech_carriers_prod if i not in sets.loc_tech_carriers_conversion_plus and constraint_exists( model_run, i.rsplit("::", 1)[0], "constraints.energy_cap_min_use" ) and i.rsplit("::", 1)[0] not in sets.loc_techs_milp ] constraint_sets["loc_tech_carriers_carrier_consumption_max_constraint"] = [ i for i in sets.loc_tech_carriers_con if i.rsplit("::", 1)[0] in sets.loc_techs_demand + sets.loc_techs_storage + sets.loc_techs_transmission and i.rsplit("::", 1)[0] not in sets.loc_techs_milp ] constraint_sets["loc_techs_resource_max_constraint"] = sets.loc_techs_supply_plus constraint_sets["loc_tech_carriers_ramping_constraint"] = [ i for i in sets.loc_tech_carriers_prod if i.rsplit("::", 1)[0] in sets.loc_techs_ramping ] # clustering-specific dispatch constraints if model_run.model.get_key( "time.function", None ) == "apply_clustering" and model_run.model.get_key( "time.function_options.storage_inter_cluster", True ): constraint_sets["loc_techs_storage_intra_max_constraint"] = sets.loc_techs_store constraint_sets["loc_techs_storage_intra_min_constraint"] = sets.loc_techs_store constraint_sets["loc_techs_storage_inter_max_constraint"] = sets.loc_techs_store constraint_sets["loc_techs_storage_inter_min_constraint"] = sets.loc_techs_store else: constraint_sets["loc_techs_storage_max_constraint"] = sets.loc_techs_store # milp.py constraint_sets["loc_techs_unit_commitment_milp_constraint"] = sets.loc_techs_milp constraint_sets["loc_techs_unit_capacity_milp_constraint"] = sets.loc_techs_milp constraint_sets["loc_tech_carriers_carrier_production_max_milp_constraint"] = [ i for i in sets.loc_tech_carriers_prod if i not in sets.loc_tech_carriers_conversion_plus and i.rsplit("::", 1)[0] in sets.loc_techs_milp ] constraint_sets[ "loc_techs_carrier_production_max_conversion_plus_milp_constraint" ] = [i for i in sets.loc_techs_conversion_plus if i in sets.loc_techs_milp] constraint_sets["loc_tech_carriers_carrier_production_min_milp_constraint"] = [ i for i in sets.loc_tech_carriers_prod if i not in sets.loc_tech_carriers_conversion_plus and constraint_exists( model_run, i.rsplit("::", 1)[0], "constraints.energy_cap_min_use" ) and i.rsplit("::", 1)[0] in sets.loc_techs_milp ] constraint_sets[ "loc_techs_carrier_production_min_conversion_plus_milp_constraint" ] = [ i for i in sets.loc_techs_conversion_plus if constraint_exists(model_run, i, "constraints.energy_cap_min_use") and i in sets.loc_techs_milp ] constraint_sets["loc_tech_carriers_carrier_consumption_max_milp_constraint"] = [ i for i in sets.loc_tech_carriers_con if i.rsplit("::", 1)[0] in sets.loc_techs_demand + sets.loc_techs_storage + sets.loc_techs_transmission and i.rsplit("::", 1)[0] in sets.loc_techs_milp ] constraint_sets["loc_techs_energy_capacity_units_milp_constraint"] = [ i for i in sets.loc_techs_milp if constraint_exists(model_run, i, "constraints.energy_cap_per_unit") is not None ] constraint_sets["loc_techs_storage_capacity_units_milp_constraint"] = [ i for i in sets.loc_techs_milp if i in sets.loc_techs_store ] constraint_sets["loc_techs_energy_capacity_max_purchase_milp_constraint"] = [ i for i in sets.loc_techs_purchase if ( constraint_exists(model_run, i, "constraints.energy_cap_equals") is not None or ( constraint_exists(model_run, i, "constraints.energy_cap_max") is not None and constraint_exists(model_run, i, "constraints.energy_cap_max") != np.inf ) ) ] constraint_sets["loc_techs_energy_capacity_min_purchase_milp_constraint"] = [ i for i in sets.loc_techs_purchase if ( not constraint_exists(model_run, i, "constraints.energy_cap_equals") and constraint_exists(model_run, i, "constraints.energy_cap_min") ) ] constraint_sets["loc_techs_storage_capacity_max_purchase_milp_constraint"] = [ i for i in set(sets.loc_techs_purchase).intersection(sets.loc_techs_store) if ( constraint_exists(model_run, i, "constraints.storage_cap_equals") is not None or ( constraint_exists(model_run, i, "constraints.storage_cap_max") is not None and constraint_exists(model_run, i, "constraints.storage_cap_max") != np.inf ) ) ] constraint_sets["loc_techs_storage_capacity_min_purchase_milp_constraint"] = [ i for i in set(sets.loc_techs_purchase).intersection(sets.loc_techs_store) if ( not constraint_exists(model_run, i, "constraints.storage_cap_equals") and constraint_exists(model_run, i, "constraints.storage_cap_min") ) ] constraint_sets["loc_techs_update_costs_investment_units_milp_constraint"] = [ i for i in sets.loc_techs_milp if i in sets.loc_techs_investment_cost and any( constraint_exists(model_run, i, "costs.{}.purchase".format(j)) for j in model_run.sets.costs ) ] # loc_techs_purchase technologies only exist because they have defined a purchase cost constraint_sets[ "loc_techs_update_costs_investment_purchase_milp_constraint" ] = sets.loc_techs_purchase constraint_sets["techs_unit_capacity_systemwide_milp_constraint"] = [ i for i in sets.techs if model_run.get_key( "techs.{}.constraints.units_max_systemwide".format(i), None ) or model_run.get_key( "techs.{}.constraints.units_equals_systemwide".format(i), None ) ] constraint_sets[ "loc_techs_asynchronous_prod_con_milp_constraint" ] = sets.loc_techs_asynchronous_prod_con # conversion.py constraint_sets[ "loc_techs_balance_conversion_constraint" ] = sets.loc_techs_conversion constraint_sets[ "loc_techs_cost_var_conversion_constraint" ] = sets.loc_techs_om_cost_conversion # conversion_plus.py constraint_sets[ "loc_techs_balance_conversion_plus_primary_constraint" ] = sets.loc_techs_conversion_plus constraint_sets["loc_techs_carrier_production_max_conversion_plus_constraint"] = [ i for i in sets.loc_techs_conversion_plus if i not in sets.loc_techs_milp ] constraint_sets["loc_techs_carrier_production_min_conversion_plus_constraint"] = [ i for i in sets.loc_techs_conversion_plus if constraint_exists(model_run, i, "constraints.energy_cap_min_use") and i not in sets.loc_techs_milp ] constraint_sets[ "loc_techs_cost_var_conversion_plus_constraint" ] = sets.loc_techs_om_cost_conversion_plus constraint_sets[ "loc_techs_balance_conversion_plus_in_2_constraint" ] = sets.loc_techs_in_2 constraint_sets[ "loc_techs_balance_conversion_plus_in_3_constraint" ] = sets.loc_techs_in_3 constraint_sets[ "loc_techs_balance_conversion_plus_out_2_constraint" ] = sets.loc_techs_out_2 constraint_sets[ "loc_techs_balance_conversion_plus_out_3_constraint" ] = sets.loc_techs_out_3 # network.py constraint_sets[ "loc_techs_symmetric_transmission_constraint" ] = sets.loc_techs_transmission # policy.py for sense in ["min", "max", "equals"]: constraint_sets[f"techlists_group_share_energy_cap_{sense}_constraint"] = [ i for i in sets.techlists if f"energy_cap_{sense}" in model_run.model.get_key("group_share.{}".format(i), {}).keys() ] constraint_sets[ f"techlists_carrier_group_share_carrier_prod_{sense}_constraint" ] = [ i + "::" + carrier for i in sets.techlists if f"carrier_prod_{sense}" in model_run.model.get_key("group_share.{}".format(i), {}).keys() for carrier in sets.carriers if carrier in model_run.model.get_key( f"group_share.{i}.carrier_prod_{sense}", {} ).keys() ] # group.py group_constraints = { name: data for name, data in model_run["group_constraints"].items() if data.get("exists", True) } constraint_sets["group_constraints"] = set() for group_constraint_name, group_constraint in group_constraints.items(): tech_groups = [ [ k for k, v in checks.DEFAULTS.tech_groups.items() if i in v["allowed_group_constraints"] ] for i in group_constraint.keys() if i not in ["techs", "locs", "exists"] ] allowed_tech_groups = set(tech_groups[0]).intersection(*tech_groups) allowed_techs = sum( [sets["techs_{}".format(i)] for i in allowed_tech_groups], [] ) techs = group_constraint.get("techs", allowed_techs) locs = group_constraint.get("locs", sets["locs"]) # If there are transmission techs, keep only those that link to allowed locations techs = [i for i in techs if ":" not in techs or i.split(":")[-1] in locs] trans_techs = set(techs).intersection(sets["techs_transmission_names"]) for i in trans_techs: techs += [i + ":" + j for j in locs] techs.remove(i) # If the group constraint defines its own techs, remove those that are not allowed techs = list(set(techs).intersection(allowed_techs)) # All possible loc_techs for this constraint loc_techs_all = list( set(concat_iterable([(l, t) for l, t in product(locs, techs)], ["::"])) ) # Some loc_techs may not actually exist in the actual model, # so we must filter with actually exising loc_techs loc_techs = [i for i in loc_techs_all if i in sets.loc_techs] default_group_config = checks.DEFAULTS.group_constraints.default_group _constraints = { k: v for k, v in group_constraint.items() if k not in ["locs", "techs", "exists"] } if any( isinstance(default_group_config.get(_constraint, False), dict) and "default_carrier" in default_group_config[_constraint].keys() for _constraint in _constraints.keys() ): if len(_constraints) > 1: raise exceptions.ModelError( "Can only handle one constraint in a group constraint if one of them is carrier-based" ) _name, _config = list(_constraints.items())[0] loc_tech_carrier_dict = _get_carrier_group_constraint_loc_techs( loc_techs, locs, _config, _name, sets, constraint_sets ) if any(len(val) == 0 for val in loc_tech_carrier_dict.values()): exceptions.warn( f"Constraint group `{group_constraint_name}` will be completely ignored since there are no valid location::technology::carrier combinations" ) continue else: for key, loc_tech_carriers in loc_tech_carrier_dict.items(): constraint_sets[ key.format(group_constraint_name) ] = loc_tech_carriers else: if len(loc_techs) == 0: exceptions.warn( f"Constraint group `{group_constraint_name}` will be completely ignored since there are no valid location::technology combinations" ) break constraint_sets[ "group_constraint_loc_techs_{}".format(group_constraint_name) ] = loc_techs _add_to_group_constraint_mapping( constraint_sets, group_constraint_name, list(_constraints.keys()) ) constraint_sets["group_constraints"] = list(constraint_sets["group_constraints"]) return constraint_sets
def apply_clustering(data, timesteps, clustering_func, how, normalize=True, scale_clusters='mean', storage_inter_cluster=True, model_run=None, **kwargs): """ Apply the given clustering function to the given data. Parameters ---------- data : xarray.Dataset timesteps : pandas.DatetimeIndex or list of timesteps or None clustering_func : str Name of clustering function. Can be `file=....csv:column_name` if loading custom clustering. Custom clustering index = timeseries days. If no column_name, the CSV file must have only one column of data. how : str How to map clusters to data. 'mean' or 'closest'. normalize : bool, optional If True (default), data is normalized before clustering is applied, using :func:`~calliope.core.time.funcs.normalized_copy`. scale_clusters : str or None, default = 'mean' Scale the results of clustering such that the clusters match the metric given by scale_clusters. For example, 'mean' scales along each loc_tech and variable to match inputs and outputs. Other options for matching include 'sum', 'max', and 'min'. If None, no scaling occurs. **kwargs : optional Arguments passed to clustering_func. Returns ------- data_new_scaled : xarray.Dataset """ assert how in ['mean', 'closest'] daily_timesteps = get_daily_timesteps(data, check_uniformity=True) timesteps_per_day = len(daily_timesteps) # get a copy of the dataset with only timeseries variables, # and get all coordinates of the original dataset, to reinstate later data_to_cluster, data_coords = _drop_timestep_vars(data, timesteps) data_to_cluster = data_to_cluster.drop_vars( ['timestep_weights', 'timestep_resolution']) for dim in data_to_cluster.dims: data_to_cluster[dim] = data[dim] if normalize: data_normalized = normalized_copy(data_to_cluster) else: data_normalized = data_to_cluster if 'file=' in clustering_func: file = clustering_func.split('=')[1] if ':' in file: file, column = file.rsplit(':', 1) else: column = None df = model_run.timeseries_data[file] if isinstance(df, pd.Series) and column is not None: exceptions.warn( '{} given as time clustering column, but only one column to ' 'choose from in {}.'.format(column, file)) clusters = df.resample('1D').mean() elif isinstance(df, pd.DataFrame) and column is None: raise exceptions.ModelError( 'No time clustering column given, but multiple columns found in ' '{0}. Choose one column and add it to {1} as {1}:name_of_column.' .format(file, clustering_func)) elif isinstance(df, pd.DataFrame) and column not in df.columns: raise KeyError('time clustering column {} not found in {}.'.format( column, file)) elif isinstance(df, pd.DataFrame): clusters = df.loc[:, column].groupby(pd.Grouper(freq='1D')).unique() # Check there weren't instances of more than one cluster assigned to a day # or days with no information assigned if any([len(i) == 0 for i in clusters.values]): raise exceptions.ModelError( 'Missing cluster days in `{}:{}`.'.format(file, column)) elif any([len(i) > 1 for i in clusters.values]): raise exceptions.ModelError( 'More than one cluster value assigned to a day in `{}:{}`. ' 'Unique clusters per day: {}'.format(file, column, clusters)) else: clusters.loc[:] = [i[0] for i in clusters.values] else: result = clustering.get_clusters(data_normalized, clustering_func, timesteps_per_day=timesteps_per_day, **kwargs) clusters = result[0] # Ignore other stuff returned data_new = clustering.map_clusters_to_data( data_to_cluster, clusters, how=how, daily_timesteps=daily_timesteps, storage_inter_cluster=storage_inter_cluster) # It's now safe to add the original coordinates back in (preserving all the # loc_tech sets that aren't used to index a variable in the DataArray) data_new.update(data_coords) data_new = _copy_non_t_vars(data, data_new) if timesteps is not None: data_new = _copy_non_t_vars(data, data_new) data_new = _combine_datasets(data.drop_sel(timesteps=timesteps), data_new) data_new = _copy_non_t_vars(data, data_new) # Scale the new/combined data so that the mean for each (loc_tech, variable) # combination matches that from the original data data_new_scaled = data_new.copy(deep=True) if scale_clusters: data_vars_in_t = [ v for v in data_new.data_vars if 'timesteps' in data_new[v].dims and 'timestep_' not in v and v != 'clusters' ] for var in data_vars_in_t: scale = (getattr(data[var], scale_clusters)(dim='timesteps') / getattr(data_new[var], scale_clusters)(dim='timesteps')) data_new_scaled[var] = data_new[var] * scale.fillna(0) lookup_clusters(data_new_scaled) return data_new_scaled
def process_locations(model_config, modelrun_techs): """ Process locations by taking an AttrDict that may include compact keys such as ``1,2,3``, and returning an AttrDict with: * exactly one key per location with all of its settings * fully resolved installed technologies for each location * fully expanded transmission links for each location Parameters ---------- model_config : AttrDict modelrun_techs : AttrDict Returns ------- locations : AttrDict locations_comments : AttrDict """ techs_in = model_config.techs.copy() tech_groups_in = model_config.tech_groups locations_in = model_config.locations links_in = model_config.get('links', AttrDict()) allowed_from_file = defaults['file_allowed'] warnings = [] errors = [] locations_comments = AttrDict() ## # Expand compressed `loc1,loc2,loc3,loc4: ...` definitions ## locations = AttrDict() for key in locations_in: if ('--' in key) or (',' in key): key_locs = explode_locations(key) for subkey in key_locs: _set_loc_key(locations, subkey, locations_in[key]) else: _set_loc_key(locations, key, locations_in[key]) ## # Kill any locations that the modeller does not want to exist ## for loc in list(locations.keys()): if not locations[loc].get('exists', True): locations.del_key(loc) ## # Process technologies ## techs_to_delete = [] for tech_name in techs_in: if not techs_in[tech_name].get('exists', True): techs_to_delete.append(tech_name) continue # Get inheritance chain generated in process_techs() inheritance_chain = modelrun_techs[tech_name].inheritance # Get and save list of required_constraints from base technology base_tech = inheritance_chain[-1] rq = model_config.tech_groups[base_tech].required_constraints # locations[loc_name].techs[tech_name].required_constraints = rq techs_in[tech_name].required_constraints = rq # Kill any techs that the modeller does not want to exist for tech_name in techs_to_delete: del techs_in[tech_name] ## # Fully expand all installed technologies for the location, # filling in any undefined parameters from defaults ## location_techs_to_delete = [] for loc_name, loc in locations.items(): if 'techs' not in loc: # Mark this as a transmission-only node if it has not allowed # any technologies locations[loc_name].transmission_node = True locations_comments.set_key( '{}.transmission_node'.format(loc_name), 'Automatically inserted: specifies that this node is ' 'a transmission-only node.') continue # No need to process any technologies at this node for tech_name in loc.techs: if tech_name in techs_to_delete: # Techs that were removed need not be further considered continue if not isinstance(locations[loc_name].techs[tech_name], dict): locations[loc_name].techs[tech_name] = AttrDict() # Starting at top of the inheritance chain, for each level, # check if the level has location-specific group settings # and keep merging together the settings, overwriting as we # go along. tech_settings = AttrDict() for parent in reversed(modelrun_techs[tech_name].inheritance): # Does the parent group have model-wide settings? tech_settings.union(tech_groups_in[parent], allow_override=True) # Does the parent group have location-specific settings? if ('tech_groups' in locations[loc_name] and parent in locations[loc_name].tech_groups): tech_settings.union( locations[loc_name].tech_groups[parent], allow_override=True) # Now overwrite with the tech's own model-wide # and location-specific settings tech_settings.union(techs_in[tech_name], allow_override=True) if tech_name in locations[loc_name].techs: tech_settings.union(locations[loc_name].techs[tech_name], allow_override=True) tech_settings = cleanup_undesired_keys(tech_settings) # Resolve columns in filename if necessary file_configs = [ i for i in tech_settings.keys_nested() if (isinstance(tech_settings.get_key(i), str) and 'file=' in tech_settings.get_key(i)) ] for config_key in file_configs: if config_key.split('.')[-1] not in allowed_from_file: # Allow any custom settings that end with _time_varying # FIXME: add this to docs if config_key.endswith('_time_varying'): warn('Using custom constraint ' '{} with time-varying data.'.format(config_key)) else: raise ModelError( '`file=` not allowed in {}'.format(config_key)) config_value = tech_settings.get_key(config_key, '') if ':' not in config_value: config_value = '{}:{}'.format(config_value, loc_name) tech_settings.set_key(config_key, config_value) tech_settings = compute_depreciation_rates(tech_name, tech_settings, warnings, errors) # Now merge the tech settings into the location-specific # tech dict -- but if a tech specifies ``exists: false``, # we kill it at this location if not tech_settings.get('exists', True): location_techs_to_delete.append('{}.techs.{}'.format( loc_name, tech_name)) else: locations[loc_name].techs[tech_name].union(tech_settings, allow_override=True) for k in location_techs_to_delete: locations.del_key(k) # Generate all transmission links processed_links = AttrDict() for link in links_in: loc_from, loc_to = link.split(',') # Skip this link entirely if it has been told not to exist if not links_in[link].get('exists', True): continue # Also skip this link - and warn about it - if it links to a # now-inexistant (because removed) location if (loc_from not in locations.keys() or loc_to not in locations.keys()): warnings.append( 'Not building the link {},{} because one or both of its ' 'locations have been removed from the model by setting ' '``exists: false``'.format(loc_from, loc_to)) continue processed_transmission_techs = AttrDict() for tech_name in links_in[link].techs: # Skip techs that have been told not to exist # for this particular link if not links_in[link].get_key('techs.{}.exists'.format(tech_name), True): continue if tech_name not in processed_transmission_techs: tech_settings = AttrDict() # Combine model-wide settings from all parent groups for parent in reversed(modelrun_techs[tech_name].inheritance): tech_settings.union(tech_groups_in[parent], allow_override=True) # Now overwrite with the tech's own model-wide settings tech_settings.union(techs_in[tech_name], allow_override=True) # Add link-specific constraint overrides if links_in[link].techs[tech_name]: tech_settings.union(links_in[link].techs[tech_name], allow_override=True) tech_settings = cleanup_undesired_keys(tech_settings) tech_settings = process_per_distance_constraints( tech_name, tech_settings, locations, locations_comments, loc_from, loc_to) tech_settings = compute_depreciation_rates( tech_name, tech_settings, warnings, errors) processed_transmission_techs[tech_name] = tech_settings else: tech_settings = processed_transmission_techs[tech_name] processed_links.set_key( '{}.links.{}.techs.{}'.format(loc_from, loc_to, tech_name), tech_settings.copy()) processed_links.set_key( '{}.links.{}.techs.{}'.format(loc_to, loc_from, tech_name), tech_settings.copy()) # If this is a one-way link, we set the constraints for energy_prod # and energy_con accordingly on both parts of the link if tech_settings.get_key('constraints.one_way', False): processed_links.set_key( '{}.links.{}.techs.{}.constraints.energy_prod'.format( loc_from, loc_to, tech_name), False) processed_links.set_key( '{}.links.{}.techs.{}.constraints.energy_con'.format( loc_to, loc_from, tech_name), False) locations.union(processed_links, allow_override=True) return locations, locations_comments, list(set(warnings)), list( set(errors))
def _warn_on_infeasibility(): return exceptions.warn( "Infeasible SPORE detected. Please check your model configuration. " "No more SPORES will be generated.")
def check_optimality(self): termination = self._model._model_data.attrs.get( 'termination_condition', 'did_not_yet_run') if termination not in ['optimal', 'did_not_yet_run']: warn('Model termination condition was not optimal. Plotting may fail!')