Пример #1
0
    def k_means_clust(self, data, num_iter, progress=False, w=100, euclidean=True):
        '''
        k-means clustering algorithm for time series data.  dynamic time warping Euclidean distance
         used as default similarity measure.
        '''
        self.centroids = random.sample(data,self.num_clust)

        for n in range(num_iter):
            if progress:
                print 'iteration ' + str(n + 1)
            # assign data points to clusters
            self.assignments = {}
            # assignments has the following structure:
            # {
            #   centroid_idx: [idxs of time series that are nearest the centroid]
            # }
            for ind, i in enumerate(data):
                # ind is the data series number, i is the data series
                # define the minimum distance for the data series
                min_dist = float('inf')
                # define the index of the closest centroid
                closest_clust = None
                for c_ind, j in enumerate(self.centroids):
                    # c_ind is the index of the centroid, j is the centroid
                    if dtw.lb_keogh_onQuery(i, j, w, euclidean) < min_dist:
                        print('lb less than min dist -- calculating full dtw')
                        # could use windowed dtw here instead to speed things up
                        cur_dist = dtw.dist_dtw(i, j, euclidean)
                        if cur_dist < min_dist:
                            min_dist = cur_dist
                            closest_clust = c_ind
                # add the index of the current data series to a cluster
                if closest_clust in self.assignments:
                    self.assignments[closest_clust].append(ind)
                else:
                    self.assignments[closest_clust] = []

            # now that we've updated the clusters that each series is part of,
            # we recalculate the centroids of the clusters
            for key in self.assignments:
                # key is the index of a centroid in the centroids list
                clust_sum = 0
                for k in self.assignments[key]:
                    # k is the index of a time series in the current cluster
                    # add the time series to the cluster sum
                    clust_sum = clust_sum + data[k]
                # update each point in the centroid with the average of all points in the cluster of time
                # series around the centroid
                self.centroids[key] = [m / len(self.assignments[key]) for m in clust_sum]
Пример #2
0
def dtw(query, subject, squared=True):
    """unconstrained Euclidean-flavoured DTW """
    
    return ldtw.dist_dtw(ldtw.TimeSeries(query), ldtw.TimeSeries(subject), squared)
Пример #3
0
# do for Euclidean and Manhatten mode
for mode, name in [(True, 'Euclidean'), (False, 'Manhatten')]:

    # calculate naive L_p-norm
    print  dtw.dist_euclidean(query, subject) \
           if mode == True else \
           dtw.dist_manhatten(query, subject)
    
    # calculate a bunch of windowed DTWs (error should decrease monotonically)
    for window in range(0, max(len(subject), len(query)), 32):
        gamma = dtw.WarpingPath()
        print dtw.dist_cdtw_backtrace(query, subject, window, gamma, mode)
        pl.plot(*zip(*[node[::-1] for node in gamma]))

    # calculate full DTW
    print dtw.dist_dtw(query, subject, mode)

    # plot objective function H(i,j) := |query(i)-subject(j)|
    pl.imshow([[abs(query[i]-subject[j]) for j in range(len(subject))] 
                                         for i in range(len(query))], aspect="auto")
    pl.title(name)
    pl.show()

    # draw explicit alignment for full dtw
    pl.plot(query)
    pl.plot(subject)
    
    for i, j in gamma[0:len(gamma):4]:
        pl.plot([i, j], [query[i], subject[j]], c="grey")
    
    pl.show()
Пример #4
0
# do for Euclidean and Manhatten mode
for mode, name in [(True, 'Euclidean'), (False, 'Manhatten')]:

    # calculate naive L_p-norm
    print  dtw.dist_euclidean(query, subject) \
           if mode == True else \
           dtw.dist_manhatten(query, subject)

    # calculate a bunch of windowed DTWs (error should decrease monotonically)
    for window in range(0, max(len(subject), len(query)), 32):
        gamma = dtw.WarpingPath()
        print dtw.dist_cdtw_backtrace(query, subject, window, gamma, mode)
        pl.plot(*zip(*[node[::-1] for node in gamma]))

    # calculate full DTW
    print dtw.dist_dtw(query, subject, mode)

    # plot objective function H(i,j) := |query(i)-subject(j)|
    pl.imshow([[abs(query[i] - subject[j]) for j in range(len(subject))]
               for i in range(len(query))],
              aspect="auto")
    pl.title(name)
    pl.show()

    # draw explicit alignment for full dtw
    pl.plot(query)
    pl.plot(subject)

    for i, j in gamma[0:len(gamma):4]:
        pl.plot([i, j], [query[i], subject[j]], c="grey")
Пример #5
0
def dtw(query, subject, squared=True):
    """unconstrained Euclidean-flavoured DTW """

    return ldtw.dist_dtw(ldtw.TimeSeries(query), ldtw.TimeSeries(subject),
                         squared)