示例#1
0
def apply_clustering(data,
                     timesteps,
                     clustering_func,
                     how,
                     normalize=True,
                     **kwargs):
    """
    Apply the given clustering function to the given data.

    Parameters
    ----------
    data : xarray.Dataset
    timesteps : pandas.DatetimeIndex or list of timesteps or None
    clustering_func : str
        Name of clustering function.
    how : str
        How to map clusters to data. 'mean' or 'closest'.
    normalize : bool, optional
        If True (default), data is normalized before clustering is applied,
        using :func:`~calliope.core.time.funcs.normalized_copy`.
    **kwargs : optional
        Arguments passed to clustering_func.

    Returns
    -------
    data_new_scaled : xarray.Dataset

    """

    # Save all coordinates, to ensure they can be added back in after clustering
    data_coords = data.copy().coords
    del data_coords['timesteps']
    # Only apply clustering function on subset of masked timesteps
    if timesteps is None:
        data_to_cluster = data
    else:
        data_to_cluster = data.loc[{'timesteps': timesteps}]

    # remove all variables that are not indexed over time
    data_to_cluster = data_to_cluster.drop([
        i for i in data.variables
        if 'timesteps' not in data[i].dims or 'timestep_' in i
    ])

    for dim in data_to_cluster.dims:
        data_to_cluster[dim] = data[dim]

    if normalize:
        data_normalized = normalized_copy(data_to_cluster)
    else:
        data_normalized = data_to_cluster

    # Get function from `clustering_func` string
    func = plugin_load(clustering_func,
                       builtin_module='calliope.core.time.clustering')

    result = func(data_normalized, **kwargs)
    clusters = result[0]  # Ignore other stuff returned

    data_new = clustering.map_clusters_to_data(data_to_cluster,
                                               clusters,
                                               how=how)

    if timesteps is None:
        data_new = _copy_non_t_vars(data, data_new)
    else:
        # Drop timesteps from old data
        data_new = _copy_non_t_vars(data, data_new)
        data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'),
                                     data_new)
        data_new = _copy_non_t_vars(data, data_new)

    # It's now safe to add the original coordiantes back in (preserving all the
    # loc_tech sets that aren't used to index a variable in the DataArray)
    data_new.update(data_coords)

    # Scale the new/combined data so that the mean for each (x, y, variable)
    # combination matches that from the original data
    data_new_scaled = data_new.copy(deep=True)
    data_vars_in_t = [
        v for v in data_new.data_vars
        if 'timesteps' in data_new[v].dims and 'timestep_' not in v
    ]
    for var in data_vars_in_t:
        scale_to_match_mean = (data[var].mean(dim='timesteps') /
                               data_new[var].mean(dim='timesteps')).fillna(0)
        data_new_scaled[var] = data_new[var] * scale_to_match_mean

    return data_new_scaled
示例#2
0
文件: time.py 项目: suvayu/calliope
def apply_time_clustering(model_data, model_run):
    """
    Take a Calliope model_data post time dimension addition, prior to any time
    clustering, and apply relevant time clustering/masking techniques.
    See doi: 10.1016/j.apenergy.2017.03.051 for applications.

    Techniques include:
    - Clustering timeseries into a selected number of 'representative' days.
        Days with similar profiles and daily magnitude are grouped together and
        represented by one 'representative' day with a greater weight per time
        step.
    - Masking timeseries, leading to variable timestep length
        Only certain parts of the input are shown at full resolution, with other
        periods being clustered together into a single timestep.
        E.g. Keep high resolution in the week with greatest wind power variability,
        smooth all other timesteps to 12H
    - Timestep resampling
        Used to reduce problem size by reducing resolution of all timeseries data.
        E.g. resample from 1H to 6H timesteps


    Parameters
    ----------
    model_data : xarray Dataset
        Preprocessed Calliope model_data, as produced using
        `calliope.preprocess.build_model_data`
        and found in model._model_data_original
    model_run : bool
        preprocessed model_run dictionary, as produced by
        Calliope.preprocess_model

    Returns
    -------
    data : xarray Dataset
        Dataset with optimisation parameters as variables, optimisation sets as
        coordinates, and other information in attributes. Time dimension has
        been updated as per user-defined clustering techniques (from model_run)

    """
    time_config = model_run.model["time"]

    data = model_data.copy(deep=True)

    ##
    # Process masking and get list of timesteps to keep at high res
    ##
    if "masks" in time_config:
        masks = {}
        # time.masks is a list of {'function': .., 'options': ..} dicts
        for entry in time_config.masks:
            entry = AttrDict(entry)
            mask_func = plugin_load(entry.function,
                                    builtin_module="calliope.time.masks")
            mask_kwargs = entry.get_key("options",
                                        default=AttrDict()).as_dict()
            masks[entry.to_yaml()] = mask_func(data, **mask_kwargs)
        data.attrs["masks"] = masks
        # Concatenate the DatetimeIndexes by using dummy Series
        chosen_timesteps = pd.concat(
            [pd.Series(0, index=m) for m in masks.values()]).index
        # timesteps: a list of timesteps NOT picked by masks
        timesteps = pd.Index(
            data.timesteps.values).difference(chosen_timesteps)
    else:
        timesteps = None

    ##
    # Process function, apply resolution adjustments
    ##
    if "function" in time_config:
        func = plugin_load(time_config.function,
                           builtin_module="calliope.time.funcs")
        func_kwargs = time_config.get("function_options", AttrDict()).as_dict()
        if "file=" in func_kwargs.get("clustering_func", ""):
            func_kwargs.update({"model_run": model_run})
        data = func(data=data, timesteps=timesteps, **func_kwargs)

    return data
示例#3
0
def apply_clustering(data,
                     timesteps,
                     clustering_func,
                     how,
                     normalize=True,
                     scale_clusters='mean',
                     **kwargs):
    """
    Apply the given clustering function to the given data.

    Parameters
    ----------
    data : xarray.Dataset
    timesteps : pandas.DatetimeIndex or list of timesteps or None
    clustering_func : str
        Name of clustering function.
    how : str
        How to map clusters to data. 'mean' or 'closest'.
    normalize : bool, optional
        If True (default), data is normalized before clustering is applied,
        using :func:`~calliope.core.time.funcs.normalized_copy`.
    scale_clusters : str or None, default = 'mean'
        Scale the results of clustering such that the clusters match the metric
        given by scale_clusters. For example, 'mean' scales along each loc_tech
        and variable to match inputs and outputs. Other options for matching
        include 'sum', 'max', and 'min'. If None, no scaling occurs.
    **kwargs : optional
        Arguments passed to clustering_func.

    Returns
    -------
    data_new_scaled : xarray.Dataset

    """

    assert how in ['mean', 'closest']

    daily_timesteps = get_daily_timesteps(data, check_uniformity=True)
    timesteps_per_day = len(daily_timesteps)

    # Save all coordinates, to ensure they can be added back in after clustering
    data_coords = data.copy().coords
    del data_coords['timesteps']
    # Only apply clustering function on subset of masked timesteps
    if timesteps is None:
        data_to_cluster = data
    else:
        data_to_cluster = data.loc[{'timesteps': timesteps}]

    # remove all variables that are not indexed over time
    data_to_cluster = data_to_cluster.drop([
        i for i in data.variables
        if 'timesteps' not in data[i].dims or 'timestep_' in i
    ])

    for dim in data_to_cluster.dims:
        data_to_cluster[dim] = data[dim]

    if normalize:
        data_normalized = normalized_copy(data_to_cluster)
    else:
        data_normalized = data_to_cluster

    # Get function from `clustering_func` string
    func = plugin_load(clustering_func,
                       builtin_module='calliope.core.time.clustering')

    result = func(data_normalized,
                  timesteps_per_day=timesteps_per_day,
                  **kwargs)
    clusters = result[0]  # Ignore other stuff returned

    data_new = clustering.map_clusters_to_data(data_to_cluster,
                                               clusters,
                                               how=how,
                                               daily_timesteps=daily_timesteps)

    if timesteps is None:
        data_new = _copy_non_t_vars(data, data_new)
    else:
        # Drop timesteps from old data
        data_new = _copy_non_t_vars(data, data_new)
        data_new = _combine_datasets(data.drop(timesteps, dim='timesteps'),
                                     data_new)
        data_new = _copy_non_t_vars(data, data_new)

    # It's now safe to add the original coordiantes back in (preserving all the
    # loc_tech sets that aren't used to index a variable in the DataArray)
    data_new.update(data_coords)

    # Scale the new/combined data so that the mean for each (loc_tech, variable)
    # combination matches that from the original data
    data_new_scaled = data_new.copy(deep=True)
    if scale_clusters:
        data_vars_in_t = [
            v for v in data_new.data_vars if 'timesteps' in data_new[v].dims
            and 'timestep_' not in v and v != 'clusters'
        ]
        for var in data_vars_in_t:
            scale = (getattr(data[var], scale_clusters)(dim='timesteps') /
                     getattr(data_new[var], scale_clusters)(dim='timesteps'))
            data_new_scaled[var] = data_new[var] * scale.fillna(0)

    return data_new_scaled
示例#4
0
def apply_time_clustering(model_data, model_run):
    """
    Take a Calliope model_data post time dimension addition, prior to any time
    clustering, and apply relevant time clustering/masking techniques.
    See doi: 10.1016/j.apenergy.2017.03.051 for applications.

    Techniques include:
    - Clustering timeseries into a selected number of 'representative' days.
        Days with similar profiles and daily magnitude are grouped together and
        represented by one 'representative' day with a greater weight per time
        step.
    - Masking timeseries, leading to variable timestep length
        Only certain parts of the input are shown at full resolution, with other
        periods being clustered together into a single timestep.
        E.g. Keep high resolution in the week with greatest wind power variability,
        smooth all other timesteps to 12H
    - Timestep resampling
        Used to reduce problem size by reducing resolution of all timeseries data.
        E.g. resample from 1H to 6H timesteps


    Parameters
    ----------
    model_data : xarray Dataset
        Preprocessed Calliope model_data, as produced using
        `calliope.core.preprocess_data.build_model_data`
        and found in model._model_data_original
    model_run : bool
        preprocessed model_run dictionary, as produced by
        Calliope.core.preprocess_model

    Returns
    -------
    data : xarray Dataset
        Dataset with optimisation parameters as variables, optimisation sets as
        coordinates, and other information in attributes. Time dimension has
        been updated as per user-defined clustering techniques (from model_run)

    """
    time_config = model_run.model['time']

    data = model_data.copy(deep=True)

    # Add temporary 'timesteps per day' attribute
    daily_timesteps = [
        data.timestep_resolution.loc[i].values
        for i in np.unique(data.timesteps.to_index().strftime('%Y-%m-%d'))
    ]
    if not np.all(daily_timesteps == daily_timesteps[0]):
        raise exceptions.ModelError(
            'For clustering, timestep resolution must be uniform.')
    data.attrs['_daily_timesteps'] = daily_timesteps[0]

    ##
    # Process masking and get list of timesteps to keep at high res
    ##
    if 'masks' in time_config:
        masks = {}
        # time.masks is a list of {'function': .., 'options': ..} dicts
        for entry in time_config.masks:
            entry = AttrDict(entry)
            mask_func = plugin_load(entry.function,
                                    builtin_module='calliope.core.time.masks')
            mask_kwargs = entry.get_key('options', default={})
            masks[entry.to_yaml()] = mask_func(data, **mask_kwargs)
        data.attrs['masks'] = masks
        # Concatenate the DatetimeIndexes by using dummy Series
        chosen_timesteps = pd.concat(
            [pd.Series(0, index=m) for m in masks.values()]).index
        # timesteps: a list of timesteps NOT picked by masks
        timesteps = pd.Index(
            data.timesteps.values).difference(chosen_timesteps)
    else:
        timesteps = None

    ##
    # Process function, apply resolution adjustments
    ##
    if 'function' in time_config:
        func = plugin_load(time_config.function,
                           builtin_module='calliope.core.time.funcs')
        func_kwargs = time_config.get('function_options', {})
        data = func(data=data, timesteps=timesteps, **func_kwargs)

    # Temporary timesteps per day attribute is no longer needed
    try:
        del data.attrs['_daily_timesteps']
    except KeyError:
        pass

    return data