def test_hierarchical():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples',
                                   'testdata.csv'),
                      index_col=0)

    results = pd.read_csv(os.path.join(os.path.dirname(__file__), '..',
                                       'examples', 'results',
                                       'testperiods_hierarchical.csv'),
                          index_col=[0, 1])

    starttime = time.time()

    aggregation = tsam.TimeSeriesAggregation(
        raw,
        noTypicalPeriods=8,
        hoursPerPeriod=24,
        clusterMethod='hierarchical',
        extremePeriodMethod='new_cluster_center',
        addPeakMin=['T'],
        addPeakMax=['Load'])

    typPeriods = aggregation.createTypicalPeriods()

    print('Clustering took ' + str(time.time() - starttime))

    np.testing.assert_array_almost_equal(typPeriods.values,
                                         results.values,
                                         decimal=4)
示例#2
0
def test_hierarchical():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','testdata.csv'), index_col = 0)

    orig_raw = pd.read_csv(os.path.join(os.path.dirname(__file__),'..','examples','results','testperiods_hierarchical.csv'), index_col = [0,1])

    starttime = time.time()

    aggregation = tsam.TimeSeriesAggregation(raw, noTypicalPeriods = 8, hoursPerPeriod = 24, 
                                            clusterMethod = 'hierarchical', 
                                            extremePeriodMethod = 'new_cluster_center',
                                        addPeakMin = ['T'], addPeakMax = ['Load'] )

    typPeriods = aggregation.createTypicalPeriods()

    print('Clustering took ' + str(time.time() - starttime))


    # sort the typical days in order to avoid error assertion due to different order
    sortedDaysOrig = orig_raw.sum(axis=0,level=0).sort_values('GHI').index
    sortedDaysTest = typPeriods.sum(axis=0,level=0).sort_values('GHI').index

    # rearange their order
    orig = orig_raw[typPeriods.columns].unstack().loc[sortedDaysOrig,:].stack()
    test = typPeriods.unstack().loc[sortedDaysTest,:].stack()

    np.testing.assert_array_almost_equal(orig.values, test.values,decimal=4)
示例#3
0
def test_samemean():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples',
                                   'testdata.csv'),
                      index_col=0)
    # get all columns as floats to avoid warning
    for col in raw.columns:
        raw[col] = raw[col].astype(float)

    starttime = time.time()

    aggregation = tsam.TimeSeriesAggregation(
        raw,
        noTypicalPeriods=8,
        hoursPerPeriod=24,
        clusterMethod='k_means',
        sameMean=True,
    )

    typPeriods = aggregation.createTypicalPeriods()
    print('Clustering took ' + str(time.time() - starttime))

    # test if the normalized time series all have the same mean
    means = aggregation.normalizedTimeSeries.mean().values
    np.testing.assert_allclose(means,
                               np.array([means[0]] * len(means)),
                               rtol=1e-5)

    # repredict the original data
    rearangedData = aggregation.predictOriginalData()

    # test if the mean fits the mean of the raw time series --> should always hold for k-means independent from sameMean True or False
    np.testing.assert_array_almost_equal(raw.mean(),
                                         rearangedData[raw.columns].mean(),
                                         decimal=4)
示例#4
0
def test_hierarchical():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples',
                                   'testdata.csv'),
                      index_col=0)

    results = pd.read_csv(os.path.join(os.path.dirname(__file__), '..',
                                       'examples', 'results',
                                       'testperiods_kmedoids.csv'),
                          index_col=[0, 1])

    starttime = time.time()

    aggregation = tsam.TimeSeriesAggregation(
        raw,
        noTypicalPeriods=8,
        hoursPerPeriod=24 * 7,
        clusterMethod='k_medoids',
    )

    typPeriods = aggregation.createTypicalPeriods()

    print('Clustering took ' + str(time.time() - starttime))

    sortedTypPeriods = typPeriods.reindex(
        typPeriods['GHI'].unstack().sum(axis=1).sort_values().index, level=0)

    np.testing.assert_array_almost_equal(sortedTypPeriods.values,
                                         results.values,
                                         decimal=4)
示例#5
0
    def discretize_tsam(
        self,
        resolution=None,
        noTypicalPeriods=10,
        hoursPerPeriod=24,
        clusterMethod="hierarchical",
        evalSumPeriods=False,
        sortValues=False,
        sameMean=False,
        rescaleClusterPeriods=True,
        weightDict=None,
        extremePeriodMethod="None",
        solver="glpk",
        roundOutput=None,
        addPeakMin=None,
        addPeakMax=None,
        addMeanMin=None,
        addMeanMax=None,
    ):
        """uses tsam"""
        try:
            import tsam.timeseriesaggregation as tsam
        except ImportError:
            raise ImportError("tsam is required for discretize_tsam()")
        if not isinstance(self.index, DatetimeIndex):
            raise TypeError("To use tsam, index of series must be a "
                            "DateTimeIndex")
        timeSeries = self.copy()
        agg = tsam.TimeSeriesAggregation(
            timeSeries,
            resolution=resolution,
            noTypicalPeriods=noTypicalPeriods,
            hoursPerPeriod=hoursPerPeriod,
            clusterMethod=clusterMethod,
            evalSumPeriods=evalSumPeriods,
            sortValues=sortValues,
            sameMean=sameMean,
            rescaleClusterPeriods=rescaleClusterPeriods,
            weightDict=weightDict,
            extremePeriodMethod=extremePeriodMethod,
            solver=solver,
            roundOutput=roundOutput,
            addPeakMin=addPeakMin,
            addPeakMax=addPeakMax,
            addMeanMin=addMeanMin,
            addMeanMax=addMeanMax,
        )

        agg.createTypicalPeriods()
        results = agg.predictOriginalData()
        results = EnergyDataFrame(results)
        results.__dict__["agg"] = agg
        return results.__finalize__(self)
def apply_time_segmentation(n, segments):
    logger.info(f"Aggregating time series to {segments} segments.")
    try:
        import tsam.timeseriesaggregation as tsam
    except:
        raise ModuleNotFoundError("Optional dependency 'tsam' not found."
                                  "Install via 'pip install tsam'")

    p_max_pu_norm = n.generators_t.p_max_pu.max()
    p_max_pu = n.generators_t.p_max_pu / p_max_pu_norm

    load_norm = n.loads_t.p_set.max()
    load = n.loads_t.p_set / load_norm

    inflow_norm = n.storage_units_t.inflow.max()
    inflow = n.storage_units_t.inflow / inflow_norm

    raw = pd.concat([p_max_pu, load, inflow], axis=1, sort=False)

    solver_name = snakemake.config["solving"]["solver"]["name"]

    agg = tsam.TimeSeriesAggregation(
        raw,
        hoursPerPeriod=len(raw),
        noTypicalPeriods=1,
        noSegments=int(segments),
        segmentation=True,
        solver=solver_name,
    )

    segmented = agg.createTypicalPeriods()

    weightings = segmented.index.get_level_values("Segment Duration")
    offsets = np.insert(np.cumsum(weightings[:-1]), 0, 0)
    snapshots = [
        n.snapshots[0] + pd.Timedelta(f"{offset}h") for offset in offsets
    ]

    n.set_snapshots(pd.DatetimeIndex(snapshots, name="name"))
    n.snapshot_weightings = pd.Series(weightings,
                                      index=snapshots,
                                      name="weightings",
                                      dtype="float64")

    segmented.index = snapshots
    n.generators_t.p_max_pu = segmented[
        n.generators_t.p_max_pu.columns] * p_max_pu_norm
    n.loads_t.p_set = segmented[n.loads_t.p_set.columns] * load_norm
    n.storage_units_t.inflow = segmented[
        n.storage_units_t.inflow.columns] * inflow_norm

    return n
示例#7
0
文件: snapshot.py 项目: Svosw/eTraGo
def tsam_cluster(timeseries_df, typical_periods=10, how='daily'):
    """
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with timeseries to cluster

    Returns
    -------
    timeseries : pd.DataFrame
        Clustered timeseries
    """

    if how == 'daily':
        hours = 24
    if how == 'weekly':
        hours = 168

    aggregation = tsam.TimeSeriesAggregation(
        timeseries_df,
        noTypicalPeriods=typical_periods,
        rescaleClusterPeriods=False,
        hoursPerPeriod=hours,
        clusterMethod='hierarchical')

    timeseries = aggregation.createTypicalPeriods()
    cluster_weights = aggregation.clusterPeriodNoOccur

    # get the medoids/ the clusterCenterIndices
    clusterCenterIndices = aggregation.clusterCenterIndices

    # get all index for every hour of that day of the clusterCenterIndices
    start = []
    # get the first hour of the clusterCenterIndices (days start with 0)
    for i in clusterCenterIndices:
        start.append(i * hours)

    # get a list with all hours belonging to the clusterCenterIndices
    nrhours = []
    for j in start:
        nrhours.append(j)
        x = 1
        while x < hours:
            j = j + 1
            nrhours.append(j)
            x = x + 1

    # get the origial Datetimeindex
    dates = timeseries_df.iloc[nrhours].index

    return timeseries, cluster_weights, dates, hours
示例#8
0
def test_preprocess():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples',
                                   'testdata.csv'),
                      index_col=0)

    raw_wind = raw.loc[:, 'Wind'].to_frame()

    aggregation_wind = tsam.TimeSeriesAggregation(raw_wind,
                                                  noTypicalPeriods=8,
                                                  hoursPerPeriod=24,
                                                  clusterMethod='hierarchical')

    aggregation_wind._preProcessTimeSeries()

    test = aggregation_wind.normalizedPeriodlyProfiles

    orig = pd.read_csv(os.path.join(os.path.dirname(__file__), '..',
                                    'examples', 'results',
                                    'preprocessed_wind.csv'),
                       index_col=[0],
                       header=[0, 1])

    np.testing.assert_array_almost_equal(test.values, orig.values, decimal=15)
system = System(lat, lon, model_name=modelname)
wind = Wind("Onshore turbine", lat=lat, lon=lon, installed=1, use_dowa=False)
pv = PhotoVoltaic("PV Full south", installed=1)

system.add_components([wind, pv])
system.fetch_input_data()
system.calculate_time_series()

source = "PVGIS-SARAH ERA-Interim 2005-2016 TMY"
#%% aggregation
raw = pd.DataFrame([wind.state.power, pv.state.power], index=["Wind", "PV"]).transpose()

aggregation = tsam.TimeSeriesAggregation(
    raw,
    noTypicalPeriods=8,
    hoursPerPeriod=24,
    clusterMethod="hierarchical",
    solver="gurobi",
)

typPeriods_c = aggregation.createTypicalPeriods()
accuracy = aggregation.accuracyIndicators()

low_to_high = typPeriods_c.groupby(level=0).mean().sum(axis=1).sort_values().index
low_to_high_map = {k: i for i, k in enumerate(low_to_high)}
low_to_high_get = lambda x: low_to_high_map.get(x)

raw["cluster"] = raw.apply(
    lambda row: low_to_high_get(aggregation.clusterOrder[row.name.dayofyear - 1]), axis=1
)
示例#10
0
import tsam.timeseriesaggregation as tsam
import pandas as pd

raw = pd.read_csv('testdata.csv', index_col=0)

aggregation = tsam.TimeSeriesAggregation(raw,
                                         noTypicalPeriods=8,
                                         hoursPerPeriod=24,
                                         clusterMethod='hierarchical')
df = aggregation.createTypicalPeriods()
weights = aggregation.clusterPeriodNoOccur
aggregation.clusterOrder

timesteps = [i for i in range(0, len(df.index.get_level_values('TimeStep')))]

print(aggregation.clusterCenterIndices)

# get all index for every hour that in day of the clusterCenterIndices
days = [
    d for d in raw.index.dayofyear if d in aggregation.clusterCenterIndices
]

# select the dates based on this
dates = raw.iloc[days].index

# TODO: Check index start (1 in dataframe, 0 in numpy -> should fit, otherwise
# days are offset one day
示例#11
0
def test_cluster_order():

    raw = pd.read_csv(os.path.join(os.path.dirname(__file__), '..', 'examples',
                                   'testdata.csv'),
                      index_col=0)

    raw_wind = raw.loc[:, 'Wind'].to_frame()

    orig_raw_predefClusterOrder = pd.read_csv(os.path.join(
        os.path.dirname(__file__), '..', 'examples', 'results',
        'testperiods_predefClusterOrder.csv'),
                                              index_col=[0, 1])

    orig_raw_predefClusterOrderAndClusterCenters = pd.read_csv(
        os.path.join(os.path.dirname(__file__), '..', 'examples', 'results',
                     'testperiods_predefClusterOrderAndClusterCenters.csv'),
        index_col=[0, 1])

    starttime = time.time()

    aggregation_wind = tsam.TimeSeriesAggregation(raw_wind,
                                                  noTypicalPeriods=8,
                                                  hoursPerPeriod=24,
                                                  clusterMethod='hierarchical')

    typPeriods_wind = aggregation_wind.createTypicalPeriods()

    aggregation_predefClusterOrder = tsam.TimeSeriesAggregation(
        raw,
        noTypicalPeriods=8,
        hoursPerPeriod=24,
        clusterMethod='hierarchical',
        predefClusterOrder=aggregation_wind.clusterOrder)

    typPeriods_predefClusterOrder = aggregation_predefClusterOrder.createTypicalPeriods(
    )

    aggregation_predefClusterOrderAndClusterCenters = tsam.TimeSeriesAggregation(
        raw,
        noTypicalPeriods=8,
        hoursPerPeriod=24,
        clusterMethod='hierarchical',
        predefClusterOrder=aggregation_wind.clusterOrder,
        predefClusterCenterIndices=aggregation_wind.clusterCenterIndices)

    typPeriods_predefClusterOrderAndClusterCenters = aggregation_predefClusterOrderAndClusterCenters.createTypicalPeriods(
    )

    print('Clustering took ' + str(time.time() - starttime))

    # sort the typical days in order to avoid error assertion due to different order
    sortedDaysOrig1 = orig_raw_predefClusterOrder.sum(
        axis=0, level=0).sort_values('GHI').index
    sortedDaysTest1 = typPeriods_predefClusterOrder.sum(
        axis=0, level=0).sort_values('GHI').index

    sortedDaysOrig2 = orig_raw_predefClusterOrderAndClusterCenters.sum(
        axis=0, level=0).sort_values('GHI').index
    sortedDaysTest2 = typPeriods_predefClusterOrderAndClusterCenters.sum(
        axis=0, level=0).sort_values('GHI').index

    # rearange their order
    orig1 = orig_raw_predefClusterOrder[
        typPeriods_predefClusterOrder.columns].unstack().loc[
            sortedDaysOrig1, :].stack()
    test1 = typPeriods_predefClusterOrder.unstack().loc[
        sortedDaysTest1, :].stack()
    orig2 = orig_raw_predefClusterOrderAndClusterCenters[
        typPeriods_predefClusterOrderAndClusterCenters.columns].unstack().loc[
            sortedDaysOrig2, :].stack()
    test2 = typPeriods_predefClusterOrderAndClusterCenters.unstack().loc[
        sortedDaysTest2, :].stack()

    np.testing.assert_array_almost_equal(orig1.values,
                                         test1[orig1.columns].values,
                                         decimal=4)
    np.testing.assert_array_almost_equal(orig2.values,
                                         test2[orig2.columns].values,
                                         decimal=4)
示例#12
0
def tsam_cluster(timeseries_df,
                 typical_periods=10,
                 how='daily',
                 extremePeriodMethod = 'None'):
    """
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with timeseries to cluster
    extremePeriodMethod: {'None','append','new_cluster_center',
                           'replace_cluster_center'}, default: 'None'
        Method how to integrate extreme Periods
        into to the typical period profiles.
        None: No integration at all.
        'append': append typical Periods to cluster centers
        'new_cluster_center': add the extreme period as additional cluster
            center. It is checked then for all Periods if they fit better
            to the this new center or their original cluster center.
        'replace_cluster_center': replaces the cluster center of the
            cluster where the extreme period belongs to with the periodly
            profile of the extreme period. (Worst case system design)

    Returns
    -------
    timeseries : pd.DataFrame
        Clustered timeseries
    """

    if how == 'daily':
        hours = 24
        period = ' days'
    if how == 'weekly':
        hours = 168
        period = ' weeks'

    print('Snapshot clustering to ' + str(typical_periods) + period + 
          ' using extreme period method: ' + extremePeriodMethod)

    aggregation = tsam.TimeSeriesAggregation(
        timeseries_df,
        noTypicalPeriods=typical_periods,
        extremePeriodMethod = extremePeriodMethod,
        addPeakMin = ['residual_load'],
        addPeakMax = ['residual_load'],
        rescaleClusterPeriods=False,
        hoursPerPeriod=hours,
        clusterMethod='hierarchical')


    timeseries = aggregation.createTypicalPeriods()
    cluster_weights = aggregation.clusterPeriodNoOccur
    clusterOrder =aggregation.clusterOrder
    clusterCenterIndices= aggregation.clusterCenterIndices

    if extremePeriodMethod  == 'new_cluster_center':
        for i in aggregation.extremePeriods.keys():
            clusterCenterIndices.insert(
                    aggregation.extremePeriods[i]['newClusterNo'],
                    aggregation.extremePeriods[i]['stepNo'])

    if extremePeriodMethod  == 'append':
        for i in aggregation.extremePeriods.keys():
            clusterCenterIndices.insert(
                    aggregation.extremePeriods[i]['clusterNo'],
                    aggregation.extremePeriods[i]['stepNo'])

    # get all index for every hour of that day of the clusterCenterIndices
    start = []
    # get the first hour of the clusterCenterIndices (days start with 0)
    for i in clusterCenterIndices:
        start.append(i * hours)

    # get a list with all hours belonging to the clusterCenterIndices
    nrhours = []
    for j in start:
        nrhours.append(j)
        x = 1
        while x < hours:
            j = j + 1
            nrhours.append(j)
            x = x + 1

    # get the origial Datetimeindex
    dates = timeseries_df.iloc[nrhours].index

    #get list of representative days
    representative_day=[]

    #cluster:medoid des jeweiligen Clusters
    dic_clusterCenterIndices = dict(enumerate(clusterCenterIndices))
    for i in clusterOrder:
        representative_day.append(dic_clusterCenterIndices[i])

    #get list of last hour of representative days
    last_hour_datetime=[]
    for i in representative_day:
        last_hour = i * hours + hours - 1
        last_hour_datetime.append(timeseries_df.index[last_hour])

    #create a dataframe (index=nr. of day in a year/candidate)
    df_cluster =  pd.DataFrame({
                        'Cluster': clusterOrder, #Cluster of the day
                        'RepresentativeDay': representative_day, #representative day of the cluster
                        'last_hour_RepresentativeDay': last_hour_datetime}) #last hour of the cluster  
    df_cluster.index = df_cluster.index + 1
    df_cluster.index.name = 'Candidate'

    #create a dataframe each timeseries (h) and its candiddate day (i) df_i_h
    nr_day = []
    x = len(timeseries_df.index)/hours+1

    for i in range(1,int(x)):
        j=1
        while j <= hours: 
            nr_day.append(i)
            j=j+1
    df_i_h = pd.DataFrame({'Timeseries': timeseries_df.index,
                        'Candidate_day': nr_day})
    df_i_h.set_index('Timeseries',inplace=True)

    return df_cluster, cluster_weights, dates, hours, df_i_h
示例#13
0
def tsam_cluster(timeseries_df, typical_periods=10, how='daily'):
    """
    Parameters
    ----------
    df : pd.DataFrame
        DataFrame with timeseries to cluster

    Returns
    -------
    timeseries : pd.DataFrame
        Clustered timeseries
    """

    if how == 'daily':
        hours = 24
    if how == 'weekly':
        hours = 168
    '''
    ###########################################################    
    #URI: This adds peaks
    
    #URI: We find the day with more peaks and pick one of the LOADS with such.
    #Get indices of the maximums
    loading = timeseries_df.filter(regex='0$|1$|2$|3$|4$|5$|6$|7$|8$|9$',axis=1)
    loading_max = loading.idxmax()
    #Get rid of the hours
    #loading_aux = loading.apply(lambda x: str(x.year) + "-" + str(x.month) + "-" + str(x.day))
    loading_aux=loading_max.apply(lambda x: x.strftime("%y-%m-%d"))
    #find the day that is peak for most of the loads
    #Notice: We just pick the fist date, could be other days with the same maximums
    #This comes from: https://stackoverflow.com/questions/6987285/python-find-the-item-with-maximum-occurrences-in-a-list
    from collections import defaultdict
    from operator import itemgetter
    c = defaultdict(int)
    for i in loading_aux:
        c[i] += 1
    peakloadday=max(c.items(), key=itemgetter(1))
    for idx, val in enumerate(loading_aux):
        if val == peakloadday[0]:
            peakloadcol = loading_aux.index.values[idx]
            break
    
    
    #URI: We find the day with more peaks and pick one of the WIND generators with such.
    #Get indices of the maximums
    wind = timeseries_df.filter(regex='wind$', axis=1)
    wind_max = wind.idxmax()
    #Get rid of the hours
    #loading_aux = loading.apply(lambda x: str(x.year) + "-" + str(x.month) + "-" + str(x.day))
    wind_aux=wind_max.apply(lambda x: x.strftime("%y-%m-%d"))
    #find the day that is peak for most of the loads
    #Notice: We just pick the fist date, could be other days with the same maximums
    #This comes from: https://stackoverflow.com/questions/6987285/python-find-the-item-with-maximum-occurrences-in-a-list
    from collections import defaultdict
    from operator import itemgetter
    c = defaultdict(int)
    for i in wind_aux:
        c[i] += 1
    peakwindday=max(c.items(), key=itemgetter(1))
    for idx, val in enumerate(wind_aux):
        if val == peakwindday[0]:
            peakwindcol = wind_aux.index.values[idx]
            break
    
    
    
    #URI: We find the day with more peaks and pick one of the SOLAR generators with such.
    #Get indices of the maximums
    solar = timeseries_df.filter(regex='solar$', axis=1)
    solar_max = solar.idxmax()
    #Get rid of the hours
    #loading_aux = loading.apply(lambda x: str(x.year) + "-" + str(x.month) + "-" + str(x.day))
    solar_aux=solar_max.apply(lambda x: x.strftime("%y-%m-%d"))
    #find the day that is peak for most of the loads
    #Notice: We just pick the fist date, could be other days with the same maximums
    #This comes from: https://stackoverflow.com/questions/6987285/python-find-the-item-with-maximum-occurrences-in-a-list
    from collections import defaultdict
    from operator import itemgetter
    c = defaultdict(int)
    for i in solar_aux:
        c[i] += 1
    peaksolarday=max(c.items(), key=itemgetter(1))
    for idx, val in enumerate(solar_aux):
        if val == peaksolarday[0]:
            #This is not the column with the overall maximum, just the first column with the maximum at the place
            #in time with more maximums
            peaksolarcol = solar_aux.index.values[idx]
            break
            
    aggregation = tsam.TimeSeriesAggregation(
    timeseries_df,
    noTypicalPeriods=typical_periods,
    rescaleClusterPeriods=False,
    hoursPerPeriod=hours,
    clusterMethod='hierarchical', 
    extremePeriodMethod = 'append', #'None', 'append', 'new_cluster_center', 'replace_cluster_center'
    addPeakMax = [peakloadcol,peakwindcol,peaksolarcol])
    '''

    ##################################################################
    #URI: This is the original method=hierarchical
    aggregation = tsam.TimeSeriesAggregation(
        timeseries_df,
        noTypicalPeriods=typical_periods,
        rescaleClusterPeriods=False,
        hoursPerPeriod=hours,
        clusterMethod='hierarchical'
    )  #averaging, k_means, k_medoids, hierarchical

    timeseries = aggregation.createTypicalPeriods()
    #URI: Better take the whole thing
    timeseries_new = aggregation.predictOriginalData()
    cluster_weights = aggregation.clusterPeriodNoOccur

    # get the medoids/ the clusterCenterIndices
    clusterCenterIndices = aggregation.clusterCenterIndices

    #URI: and add the peak periods
    #clusterOrder = aggregation.clusterOrder
    #for i in range(len(clusterCenterIndices),len(cluster_weights.keys())):
    #    clusterCenterIndices.append(np.where(clusterOrder == i)[0][0])

    # get all index for every hour of that day of the clusterCenterIndices
    start = []
    # get the first hour of the clusterCenterIndices (days start with 0)
    for i in clusterCenterIndices:
        start.append(i * hours)

    # get a list with all hours belonging to the clusterCenterIndices
    nrhours = []
    for j in start:
        nrhours.append(j)
        x = 1
        while x < hours:
            j = j + 1
            nrhours.append(j)
            x = x + 1

    # get the origial Datetimeindex
    dates = timeseries_df.iloc[nrhours].index
    '''
    
    ######################################################
    #URI: This is them method=k_means
    aggregation = tsam.TimeSeriesAggregation(
    timeseries_df,
    noTypicalPeriods=typical_periods,
    rescaleClusterPeriods=False,
    hoursPerPeriod=hours,
    clusterMethod='k_means') #averaging, k_means, k_medoids, hierarchical

    timeseries = aggregation.createTypicalPeriods()
    #URI: Better take the whole thing
    timeseries_new =aggregation.predictOriginalData()
    cluster_weights = aggregation.clusterPeriodNoOccur

    
    # get a representative asclusterCenterIndices
    #clusterCenterIndices = aggregation.clusterCenterIndices
    clusterCenterIndices = []
    for i in cluster_weights.keys():
        clusterCenterIndices.append(np.argmax(aggregation.clusterOrder == i))
    
    #URI: and add the peak periods
    #clusterOrder = aggregation.clusterOrder
    #for i in range(len(clusterCenterIndices),len(cluster_weights.keys())):
    #    clusterCenterIndices.append(np.where(clusterOrder == i)[0][0]) 
    
    # get all index for every hour of that day of the clusterCenterIndices
    start = []
    # get the first hour of the clusterCenterIndices (days start with 0)
    for i in clusterCenterIndices:
        start.append(i * hours)

    # get a list with all hours belonging to the clusterCenterIndices
    nrhours = []
    for j in start:
        nrhours.append(j)
        x = 1
        while x < hours:
            j = j + 1
            nrhours.append(j)
            x = x + 1

    # get the origial Datetimeindex
    dates = timeseries_df.iloc[nrhours].index
    '''

    ##########################################################
    #URI: his is to get the data as csv
    directory = '/home/raventos/Results/tsamdata/'
    timeseries_df.to_csv(directory + 'rawPeriods' + str(typical_periods) +
                         '.csv')
    timeseries.to_csv(directory + 'typPeriods' + str(typical_periods) + '.csv')
    timeseries_new.to_csv(directory + 'predictedPeriods' +
                          str(typical_periods) + '.csv')
    aggregation.indexMatching().to_csv(directory + 'indexMatching' +
                                       str(typical_periods) + '.csv')
    np.savetxt(directory + 'clusterOrder' + str(typical_periods) + '.csv',
               aggregation.clusterOrder.astype(int),
               fmt='%i',
               delimiter=",")
    cCI = pd.DataFrame(clusterCenterIndices, columns=['index'])
    cCI.to_csv(directory + 'clusterCenterIndices' + str(typical_periods) +
               '.csv')
    noOccur = pd.DataFrame(cluster_weights, index=['noOccur'])
    noOccur.to_csv(directory + 'noOccurrances' + str(typical_periods) + '.csv')
    aggregation.accuracyIndicators().to_csv(directory + 'indicators' +
                                            str(typical_periods) + '.csv')
    np.savetxt(directory + 'dates' + str(typical_periods) + '.csv',
               dates.strftime("%Y-%m-%d %X"),
               fmt="%s",
               delimiter=",")

    #URI: Just checking
    #print([peakloadcol,peakwindcol,peaksolarcol])
    #print(clusterCenterIndices)
    #print([peakloadday,peakwindday,peaksolarday])
    #print(dates)
    #print(cluster_weights)
    #print(aggregation.indexMatching())
    return timeseries_new, cluster_weights, dates, hours
示例#14
0
def temporal_clustering(datapackage, n, path="/tmp", how="daily"):
    """ Creates a new datapackage by aggregating sequences inside the
    `sequence` folder of the specified datapackage by clustering `n` timesteps

    Parameters
    ----------
    datapackage: string
        String of meta data file datapackage.json
    n: integer
        Number of clusters
    path: string
        Path to directory where the aggregated datapackage is stored
    how: string
        How to cluster 'daily' or 'hourly'
    """
    if how == "weekly":
        raise NotImplementedError("Weekly clustering is not implemented!")

    p = Package(datapackage)

    cwd = os.getcwd()

    copied_package_name = (p.descriptor["name"] + "__temporal_cluster__" +
                           how + "_" + str(n))

    copy_path = os.path.join(path, p.descriptor["name"], copied_package_name)

    copied_root = copy_datapackage(datapackage,
                                   os.path.abspath(copy_path),
                                   subset="data")

    sequence_resources = [
        r for r in p.resources
        if re.match(r"^data/sequences/.*$", r.descriptor["path"])
    ]

    dfs = {
        r.name:
        pd.DataFrame(r.read(keyed="True")).set_index("timeindex").astype(float)
        for r in sequence_resources
    }
    sequences = pd.concat(dfs.values(), axis=1)

    if how == "daily":
        hoursPerPeriod = 24
    elif how == "hourly":
        hoursPerPeriod = 1
    elif how == "weekly":
        hoursPerPeriod = 24 * 7

    aggregation = tsam.TimeSeriesAggregation(
        sequences,
        noTypicalPeriods=n,
        rescaleClusterPeriods=False,
        hoursPerPeriod=hoursPerPeriod,
        clusterMethod="hierarchical",
    )

    cluster_weights = {
        aggregation.clusterCenterIndices[n]: w
        for n, w in aggregation.clusterPeriodNoOccur.items()
    }
    if how == "daily":
        temporal = pd.Series(
            {
                d: cluster_weights[d.dayofyear]
                for d in sequences.index
                if d.dayofyear in aggregation.clusterCenterIndices
            },
            name="weighting",
        )
        temporal.index.name = "timeindex"

    elif how == "hourly":
        temporal = pd.Series(
            {
                h: cluster_weights[sequences.index.get_loc(h)]
                for h in sequences.index if sequences.index.get_loc(h) in
                aggregation.clusterCenterIndices
            },
            name="weighting",
        )
        temporal.index.name = "timeindex"

    # write resources to copied package (should not interfer with meta data)
    # as columns are not removed and sorted when written.
    os.chdir(copied_root)
    for r in sequence_resources:
        write_sequences(r.name + ".csv",
                        dfs[r.name].loc[temporal.index],
                        replace=True)

    # write temporal information from clustering
    temporal.to_csv(
        "data/temporal.csv",
        header=True,
        sep=";",
        date_format="%Y-%m-%dT%H:%M:%SZ",
    )
    # add meta data for new temporal information
    r = Resource({"path": "data/temporal.csv"})
    r.infer()
    # TODO: Add meta-data description
    r.descriptor[
        "description"] = "Temporal selection based on hierachical clustering..."

    # Update meta-data of copied package
    cp = Package("datapackage.json")
    cp.descriptor["name"] = copied_package_name
    cp.descriptor["resources"].append(r.descriptor)
    cp.commit()
    cp.save("datapackage.json")

    # set back to 'old' workdirectory
    os.chdir(cwd)

    return copied_root