Пример #1
0
 def _update_centroids(self, X):
     if self.metric_params is None:
         metric_params = {}
     else:
         metric_params = self.metric_params.copy()
     if "gamma_sdtw" in metric_params.keys():
         metric_params["gamma"] = metric_params["gamma_sdtw"]
         del metric_params["gamma_sdtw"]
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
def template_signal(signals):

    template_signals = []
    for participant in signals:
              
        template_signals.append(dtw_barycenter_averaging(participant, max_iter=0))                 

    return template_signals    
def plot_som_series_dba_center(som_x, som_y, win_map):
    fig, axs = plt.subplots(som_x, som_y, figsize=(25, 25))
    fig.suptitle('Clusters')
    for x in range(som_x):
        for y in range(som_y):
            cluster = (x, y)
            if cluster in win_map.keys():
                for series in win_map[cluster]:
                    axs[cluster].plot(series, c="gray", alpha=0.5)
                axs[cluster].plot(dtw_barycenter_averaging(
                    np.vstack(win_map[cluster])),
                                  c="red")  # I changed this part
            cluster_number = x * som_y + y + 1
            axs[cluster].set_title(f"Cluster {cluster_number}")

    plt.show()
def barycenter_imputation(missing_set, n_partitions=24, n_individuals=10):

    imputted = missing_set.copy()

    for ind in range(missing_set.shape[0]):
        missing_voxels = np.argwhere(np.isnan(missing_set[ind, :, 0]))
        complete_voxels = np.argwhere(1 - np.isnan(missing_set[ind, :, 0]))

        average_bary_center = dtw_barycenter_averaging(
            missing_set[ind, complete_voxels[:, 0], :])
        imputted[ind, missing_voxels[:, 0], :] = average_bary_center

    imputted = data_utils.compress_maintain_dim(imputted,
                                                n_partitions=n_partitions,
                                                n_individuals=n_individuals)

    return imputted.reshape(imputted.shape[:-1])
Пример #5
0
def ts_average(x, pid_indices):
    '''
    Input:
        x: [[time series 1], ... , [time series n]]
        pid_indices: {pid:[indicies]}
    Output:
        Average time series for each unique PID in PID_indices
        The pid label for each of the average time series
    '''
    dtw_avg = []
    dtw_avg_pid = []

    for i in pid_indices:
        dtw_avg.append(dtw_barycenter_averaging([x[j]
                                                 for j in pid_indices[i]]))
        dtw_avg_pid.append(i)

    return dtw_avg, dtw_avg_pid
Пример #6
0
 def _update_centroids(self, X):
     metric_params = self._get_metric_params()
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 metric_params=metric_params,
                 verbose=False)
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = softdtw_barycenter(
                 X=X[self.labels_ == k],
                 max_iter=self.max_iter_barycenter,
                 init=self.cluster_centers_[k],
                 **metric_params)
         else:
             self.cluster_centers_[k] = euclidean_barycenter(
                 X=X[self.labels_ == k])
Пример #7
0
 def _update_centroids(self, X):
     for k in range(self.n_clusters):
         if self.metric == "dtw":
             self.cluster_centers_[k] = dtw_barycenter_averaging(
                 X=X[self.labels_ == k],
                 barycenter_size=None,
                 init_barycenter=self.cluster_centers_[k],
                 verbose=False)
             # DTWBarycenterAveraging(max_iter=self.max_iter_barycenter,
             #                                               barycenter_size=None,
             #                                               init_barycenter=self.cluster_centers_[k],
             #                                               verbose=False).fit(X[self.labels_ == k])
         elif self.metric == "softdtw":
             self.cluster_centers_[k] = SoftDTWBarycenter(
                 max_iter=self.max_iter_barycenter,
                 gamma=self.gamma_sdtw,
                 init=self.cluster_centers_[k]).fit(X[self.labels_ == k])
         else:
             self.cluster_centers_[k] = EuclideanBarycenter().fit(
                 X[self.labels_ == k])
    dtw_barycenter_averaging, softdtw_barycenter
from tslearn.datasets import CachedDatasets

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X = X_train[y_train == 2]

plt.figure()
plt.subplot(3, 1, 1)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(euclidean_barycenter(X).ravel(), "r-", linewidth=2)
plt.title("Euclidean barycenter")

plt.subplot(3, 1, 2)
dba_bar = dtw_barycenter_averaging(X, max_iter=100, verbose=False)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(dba_bar.ravel(), "r-", linewidth=2)
plt.title("DBA")

plt.subplot(3, 1, 3)
sdtw_bar = softdtw_barycenter(X, gamma=1., max_iter=100)
for ts in X:
    plt.plot(ts.ravel(), "k-", alpha=.2)
plt.plot(sdtw_bar.ravel(), "r-", linewidth=2)
plt.title("Soft-DTW barycenter ($\gamma$=1.)")

plt.tight_layout()
plt.show()
Пример #9
0
    # plt.savefig(fig_path+'dbscan_pairplot_full_samples_solo_L2346', dpi=600, transparent=True)  #descomentar para guardar la figura. entre comillas va el nombre

    print(
        df.drop([
            'cycle_time', 'errored_cycle', 'encoded_leech_no',
            'encoded_video_name'
        ],
                axis=1).groupby(['pred', 'cycle_reset']).describe())
    #%%
    good_mask = (df.pred != -1).values
    dropped_df = df[good_mask]

    dropped_lengths = transposed_lengths[good_mask]
    closest_idx = []
    for cluster in np.sort(dropped_df.pred.unique()):
        bc = barycenters.dtw_barycenter_averaging(
            dropped_lengths[(dropped_df.pred == cluster).values])
        closest_idx.append(
            np.argmin(np.power(binned_lengths - bc.T, 2).sum(axis=(1, 2))))
        fig, ax = cvU.plotBinnedLengths(bc.T, zscore_speed=True)
        fig.suptitle('cluster {}'.format(cluster))

    for idx in closest_idx:
        f, a = cvU.plotBinnedLengths(binned_lengths[idx])
        fig.suptitle('idx: {}, cluster: {}'.format(idx, df.pred[idx]))

    df.drop([
        'cycle_reset', 'errored_cycle', 'encoded_video_name',
        'encoded_leech_no'
    ],
            axis=1).loc[closest_idx]
Пример #10
0
import csv, argparse
import numpy as np
from tslearn.barycenters import dtw_barycenter_averaging

parser = argparse.ArgumentParser()
parser.add_argument("corpus", help="Type of corpus you're working with")
parser.add_argument("language", help="Name of the language you're using")
parser.add_argument("gram", help="Unigram or trigram")
args = parser.parse_args()

assert args.gram in ["unigram",
                     "trigram"], "Only accepts 'unigram' or 'trigram'"

## read in surprisals data
X = []
with open("../../ValSurprisals/" + args.corpus + '/' + args.gram + '/' + \
            args.language + "_compressed.csv", 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        X.append(row[1:])

# get barycenter for each size hyperparameter value as list
for BARYCENTER_SIZE in range(1, 15):
    X = [[float(item) for item in series if item != "NA"] for series in X]
    dtw_barycenter_averaging(X = X,
                    barycenter_size = BARYCENTER_SIZE,
                    verbose = True)\
                    .reshape(BARYCENTER_SIZE).tolist()
    print(BARYCENTER_SIZE)
Пример #11
0
# read in surprisals data
X = []
weights = []
with open("ValSurprisals/" + args.corpus + '/' + args.gram + '/' + \
            args.language + "_compressed.csv", 'r') as f:
     reader = csv.reader(f)
     for row in reader:
         X.append(row[1:])
         weights.append(int(row[0]))

# get barycenter of info-curves as list
data = [[float(item) for item in series if item != "NA"] for series in X]

barycenter = dtw_barycenter_averaging(X = data,
                barycenter_size = BARYCENTER_SIZE,
                weights = np.array(weights)).reshape(BARYCENTER_SIZE).tolist()
barycenter += [args.language, args.corpus, args.gram]

# output barycenter to
with open(OUTPUT_FILE, 'a') as f:
    writer = csv.writer(f)
    writer.writerow(barycenter)
# X = []
# with open("ValSurprisals/" + args.corpus + '/' + args.gram + '/' + \
#             args.language + "_compressed.csv", 'r') as f:
#      reader = csv.reader(f)
#      for row in reader:
#          X.append(row)
#
# # get barycenter of info-curves as list
Пример #12
0
from scipy.cluster.hierarchy import linkage, dendrogram

# read in barycenters and filter to unigrams
barycenters = pd.read_csv("../Data/5barycenters.csv")
barycenters = barycenters[(barycenters["gram"] == "unigram") & \
    (barycenters["source"] == "wikipedia")]
# get the numerical barycenters
centers = barycenters.loc[:, '1':'5'].to_numpy()

# linkage is the workhorse: it produces an array with pairwise clusters in the
# first two columns, then the distance in the third column, then the number of
# members of the created cluster in the fourth column
Z = linkage(centers, method="single", metric=dtw)
expanded_centers = centers.copy()

for row in Z:
    cluster = np.vstack(
        [expanded_centers[int(row[0])], expanded_centers[int(row[1])]])
    new_center = dtw_barycenter_averaging(cluster)
    expanded_centers = np.vstack((expanded_centers, new_center.T))

# save data to file
##### FIX WRITING IN WEIRD SCIENTIFIC NOTATION
np.savetxt("linkage.txt", Z)
np.savetxt("expanded_centers.txt", expanded_centers)

languages = barycenters["language"].tolist()
with open("languages.txt", 'w') as language_file:
    for language in languages:
        language_file.write("%s\n" % language)
Пример #13
0
import csv
import numpy as np
import pandas as pd
from tslearn.barycenters import dtw_barycenter_averaging

## within gram
X = []
with open("Data/5barycenters.csv", 'r') as f:
    reader = csv.reader(f)
    for row in reader:
        X.append(row)

## split X into unigram and trigram
unigrams = [series[:5] for series in X if series[-2] == "wikipedia" and \
                                        series[-1] == "unigram"]
trigrams = [series[:5] for series in X if series[-2] == "wikipedia" and \
                                        series[-1] == "trigram"]

unigram_barycenter = dtw_barycenter_averaging(
    X=unigrams, verbose=True).reshape(5).tolist()
trigram_barycenter = dtw_barycenter_averaging(
    X=trigrams, verbose=True).reshape(5).tolist()

## within
X = pd.read_csv("Data/5barycenters_fam.csv")
Пример #14
0
            genealogy[row[-2]] = []
        genealogy[row[-2]].append(row[-1])
# get unique genii
genealogy = {family: list(set(genii)) for family, genii in genealogy.items()}
barycenters = []

for gram in grams:
    ## create gram mean barycenter
    data = [row[:5] for row in X if row[-3] == gram]
    data = [[float(item) for item in series] for series in data]

    data = [[el - sum(row) / len(row) for el in row] for row in data]

    if data != []:
        barycenter = dtw_barycenter_averaging(
            X=data, barycenter_size=BARYCENTER_SIZE,
            verbose=True).reshape(BARYCENTER_SIZE).tolist()
        barycenter += ["mean", "mean", gram]
        barycenters.append(barycenter)

    for family in genealogy:
        ## create family mean barycenter
        # print("family", family, gram)
        data = [row[:5] for row in X if row[-2] == family and row[-3] == gram]
        data = [[float(item) for item in series] for series in data]

        data = [[el - sum(row) / len(row) for el in row] for row in data]

        if data != []:
            barycenter = dtw_barycenter_averaging(
                X=data, barycenter_size=BARYCENTER_SIZE,
    print("split no. %d" % split)
    X = []
    Y = []
    with open("../../ValSurprisals/" + args.corpus + '/' + args.gram + '/' + \
                args.language + "_training" + str(split) + ".csv", 'r') as f:
         reader = csv.reader(f)
         for row in reader:
             X.append(row[1:])

    with open("../../ValSurprisals/" + args.corpus + '/' + args.gram + '/' + \
                    args.language + "_test" + str(split) + ".csv", 'r') as f:
             reader = csv.reader(f)
             for row in reader:
                 Y.append(row[1:])

    # get barycenter for each size hyperparameter value as list
    for BARYCENTER_SIZE in range(1, 16):
        X = [[float(item) for item in series if item != "NA"] for series in X]
        Y = [[float(item) for item in series if item != "NA"] for series in Y]

        barycenter = dtw_barycenter_averaging(X = X, barycenter_size = BARYCENTER_SIZE)

        total_dtw_dist = 0
        for element in Y:
            total_dtw_dist += dtw(barycenter, element)

        compression_costs[split-1, BARYCENTER_SIZE-1] = total_dtw_dist

np.savetxt("costs/" + args.corpus + '/' + args.gram + '/' + \
            args.language + "_dtw_dists.csv", compression_costs, delimiter=",")
Пример #16
0
    # plot all points of the data set
    for series in X:
        plt.plot(series.ravel(), "k-", alpha=.2)
    # plot the given barycenter of them
    plt.plot(barycenter.ravel(), "r-", linewidth=2)


# plot the four variants with the same number of iterations and a tolerance of
# 1e-3 where applicable
ax1 = plt.subplot(4, 1, 1)
plt.title("Euclidean barycenter")
plot_helper(euclidean_barycenter(X))

plt.subplot(4, 1, 2, sharex=ax1)
plt.title("DBA (vectorized version of Petitjean's EM)")
plot_helper(dtw_barycenter_averaging(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 3, sharex=ax1)
plt.title("DBA (subgradient descent approach)")
plot_helper(dtw_barycenter_averaging_subgradient(X, max_iter=50, tol=1e-3))

plt.subplot(4, 1, 4, sharex=ax1)
plt.title("Soft-DTW barycenter ($\gamma$=1.0)")
plot_helper(softdtw_barycenter(X, gamma=1., max_iter=50, tol=1e-3))

# clip the axes for better readability
ax1.set_xlim([0, length_of_sequence])

# show the plot(s)
plt.tight_layout()
plt.show()
Пример #17
0
def dtw_avg(class_x):
    class_avg = [list() for i in range(len(class_x))]
    for c in range(len(class_x)):
        class_avg[c] = dtw_barycenter_averaging(class_x[c], max_iter=100)
    class_avg = np.array(class_avg)
    return class_avg