Пример #1
0
def main(tseries_fpath, k, plot_foldpath):
    import mkl
    mkl.set_num_threads(16)

    initialize_matplotlib()

    X = np.genfromtxt(tseries_fpath)[:, 1:]
    aux = X.sum(axis=1)
    fix = np.where(aux == 0)[0]
    X[fix] += .001  #fixing zero only rows
    X = X.copy()

    cent, assign, shift, dists_cent = ksc.inc_ksc(X, k)

    for i in range(cent.shape[0]):
        t_series = cent[i]

        plt.plot(t_series, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d.pdf' % i))
        plt.close()

        half = t_series.shape[0] // 2
        to_shift = half - np.argmax(t_series)
        to_plot_peak_center = dist.shift(t_series, to_shift, rolling=True)
        plt.plot(to_plot_peak_center, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d-peak-center.pdf' % i))
        plt.close()

        to_shift = 0 - np.argmin(t_series)
        to_plot_min_first = dist.shift(t_series, to_shift, rolling=True)
        plt.plot(to_plot_min_first, '-k')
        plt.gca().get_xaxis().set_visible(False)
        plt.gca().get_yaxis().set_visible(False)
        #plt.ylabel('Views')
        #plt.xlabel('Time')
        plt.savefig(os.path.join(plot_foldpath, '%d-min-first.pdf' % i))
        plt.close()

    np.savetxt(os.path.join(plot_foldpath, 'cents.dat'), cent, fmt='%.5f')
    np.savetxt(os.path.join(plot_foldpath, 'assign.dat'), assign, fmt='%d')
    np.savetxt(os.path.join(plot_foldpath, 'shift.dat'), shift, fmt='%d')
    np.savetxt(os.path.join(plot_foldpath, 'dists_cent.dat'),
               dists_cent,
               fmt='%.5f')
Пример #2
0
def plot_series(t_series, plot_foldpath, name, shift=False):

    to_plot = t_series
    if shift:
        to_shift = 0 - np.argmin(t_series)
        to_plot = dist.shift(t_series, to_shift, rolling=True)

    plt.plot(to_plot, '-k')
    plt.ylabel('Views')
    plt.xlabel('Time')
    plt.savefig(os.path.join(plot_foldpath, '%s.png' % name))
    plt.close()
Пример #3
0
def plot_series(t_series, plot_foldpath, name, shift=False):
    
    to_plot = t_series
    if shift:
        to_shift = 0 - np.argmin(t_series)
        to_plot = dist.shift(t_series, to_shift, rolling=True)
        
    plt.plot(to_plot, '-k')
    plt.ylabel('Views')
    plt.xlabel('Time')
    plt.savefig(os.path.join(plot_foldpath, '%s.png' % name))
    plt.close()
Пример #4
0
def _compute_centroids(tseries, assign, num_clusters, to_shift=None):
    '''
    Given a time series matrix and cluster assignments, this method will
    compute the spectral centroids for each cluster.
    
    Arguments
    ---------
    tseries: matrix (n_series, n_points)
        Time series beng clustered
    assign: array of ints (size = n_series)
        The cluster assignment for each time series
    num_clusters: int
        The number of clusters being searched for
    to_shift (optional): array of ints (size = n_series)
        Determines if time series should be shifted, if different from `None`.
        In this case, each series will be shifted by the corresponding amount
        in the array.
    '''

    series_size = tseries.shape[1]
    centroids = np.ndarray((num_clusters, series_size))

    #shift series for best centroid distance
    #TODO: this method can be cythonized and done in parallel
    shifted = tseries
    if to_shift is not None:
        for i in xrange(tseries.shape[0]):
            shifted[i] = shift(tseries[i], to_shift[i], rolling=True)

    #compute centroids
    for k in xrange(num_clusters):
        members = shifted[assign == k]
        if members.any():
            num_members = 0
            if members.ndim == 2:
                axis = 1
                num_members = members.shape[0]
            else:
                axis = 0
                num_members = 1
            
            ssqs = np.tile(np.sum(members**2, axis=axis), (series_size, 1))
            #the original papers divides by ssqs only, while the author's
            #example implementation uses sqrt. We chose sqrt because it appears
            #to yield better centroids.
            aux = members / np.sqrt(ssqs.T)

            x_mat = np.dot(aux.T, aux)
            i_mat = num_members * np.eye(series_size)
            m_mat = i_mat - x_mat

            #compute eigenvalues and chose the vector for the smallest one
            #TODO: Check if using scipy's linalg is faster (has more options
            #      such as finding only the smallest eigval)
            _, eig_vectors = LA.eigh(m_mat, eigvals=(0, 0))
            centroids[k] = eig_vectors[:,0]
            
            if centroids[k].sum() < 0:
                centroids[k] = -centroids[k]
        else:
            centroids[k] = np.zeros(series_size)

    return centroids
Пример #5
0
    def test_shift_drop(self):
        array = np.array([1.0])
        assert_array_equal(np.array([1.0]), dist.shift(array, 0, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, 1, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, 1, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, -2, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, -2, False))

        array = np.array([1.0, 2.0, 3.0, 4.0])
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 0, False))

        assert_array_equal(np.array([0.0, 1.0, 2.0, 3.0]), dist.shift(array, 1, False))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 0.0]), dist.shift(array, -1, False))

        assert_array_equal(np.array([0.0, 0.0, 1.0, 2.0]), dist.shift(array, 2, False))
        assert_array_equal(np.array([3.0, 4.0, 0.0, 0.0]), dist.shift(array, -2, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 1.0]), dist.shift(array, 3, False))
        assert_array_equal(np.array([4.0, 0.0, 0.0, 0.0]), dist.shift(array, -3, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 4, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -4, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 5, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -5, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, 50, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]), dist.shift(array, -50, False))
Пример #6
0
    def test_shift_roll(self):
        array = np.array([])
        assert_array_equal(np.array([]), dist.shift(array, 0))
        assert_array_equal(np.array([]), dist.shift(array, -1))
        assert_array_equal(np.array([]), dist.shift(array, 1))
        assert_array_equal(np.array([]), dist.shift(array, 10))
        assert_array_equal(np.array([]), dist.shift(array, -10))

        array = np.array([1.0])
        assert_array_equal(np.array([1.0]), dist.shift(array, 0, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, 1, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, 1, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, -2, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, -2, True))

        array = np.array([1.0, 2.0, 3.0, 4.0])
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 0, True))

        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 1, True))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, -1, True))

        assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, 2, True))
        assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]), dist.shift(array, -2, True))

        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, 3, True))
        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, -3, True))

        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 4, True))
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -4, True))

        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]), dist.shift(array, 5, True))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]), dist.shift(array, -5, True))

        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, 8, True))
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]), dist.shift(array, -8, True))
Пример #7
0
def _compute_centroids(tseries, assign, num_clusters, to_shift=None):
    '''
    Given a time series matrix and cluster assignments, this method will
    compute the spectral centroids for each cluster.
    
    Arguments
    ---------
    tseries: matrix (n_series, n_points)
        Time series beng clustered
    assign: array of ints (size = n_series)
        The cluster assignment for each time series
    num_clusters: int
        The number of clusters being searched for
    to_shift (optional): array of ints (size = n_series)
        Determines if time series should be shifted, if different from `None`.
        In this case, each series will be shifted by the corresponding amount
        in the array.
    '''

    series_size = tseries.shape[1]
    centroids = np.ndarray((num_clusters, series_size))

    #shift series for best centroid distance
    #TODO: this method can be cythonized and done in parallel
    shifted = tseries
    if to_shift is not None:
        for i in range(tseries.shape[0]):
            shifted[i] = shift(tseries[i], to_shift[i], rolling=True)

    #compute centroids
    for k in range(num_clusters):
        members = shifted[assign == k]
        if members.any():
            num_members = 0
            if members.ndim == 2:
                axis = 1
                num_members = members.shape[0]
            else:
                axis = 0
                num_members = 1

            ssqs = np.tile(np.sum(members**2, axis=axis), (series_size, 1))
            #the original papers divides by ssqs only, while the author's
            #example implementation uses sqrt. We chose sqrt because it appears
            #to yield better centroids.
            aux = members / np.sqrt(ssqs.T)

            x_mat = np.dot(aux.T, aux)
            i_mat = num_members * np.eye(series_size)
            m_mat = i_mat - x_mat

            #compute eigenvalues and chose the vector for the smallest one
            #TODO: Check if using scipy's linalg is faster (has more options
            #      such as finding only the smallest eigval)
            _, eig_vectors = LA.eigh(m_mat, eigvals=(0, 0))
            centroids[k] = eig_vectors[:, 0]

            if centroids[k].sum() < 0:
                centroids[k] = -centroids[k]
        else:
            centroids[k] = np.zeros(series_size)

    return centroids
Пример #8
0
    def test_shift_drop(self):
        array = np.array([1.0])
        assert_array_equal(np.array([1.0]), dist.shift(array, 0, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, 1, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, 1, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, -2, False))
        assert_array_equal(np.array([0.0]), dist.shift(array, -2, False))

        array = np.array([1.0, 2.0, 3.0, 4.0])
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, 0, False))

        assert_array_equal(np.array([0.0, 1.0, 2.0, 3.0]),
                           dist.shift(array, 1, False))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 0.0]),
                           dist.shift(array, -1, False))

        assert_array_equal(np.array([0.0, 0.0, 1.0, 2.0]),
                           dist.shift(array, 2, False))
        assert_array_equal(np.array([3.0, 4.0, 0.0, 0.0]),
                           dist.shift(array, -2, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 1.0]),
                           dist.shift(array, 3, False))
        assert_array_equal(np.array([4.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, -3, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, 4, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, -4, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, 5, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, -5, False))

        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, 50, False))
        assert_array_equal(np.array([0.0, 0.0, 0.0, 0.0]),
                           dist.shift(array, -50, False))
Пример #9
0
    def test_shift_roll(self):
        array = np.array([])
        assert_array_equal(np.array([]), dist.shift(array, 0))
        assert_array_equal(np.array([]), dist.shift(array, -1))
        assert_array_equal(np.array([]), dist.shift(array, 1))
        assert_array_equal(np.array([]), dist.shift(array, 10))
        assert_array_equal(np.array([]), dist.shift(array, -10))

        array = np.array([1.0])
        assert_array_equal(np.array([1.0]), dist.shift(array, 0, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, 1, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, 1, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, -2, True))
        assert_array_equal(np.array([1.0]), dist.shift(array, -2, True))

        array = np.array([1.0, 2.0, 3.0, 4.0])
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, 0, True))

        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]),
                           dist.shift(array, 1, True))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.]),
                           dist.shift(array, -1, True))

        assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]),
                           dist.shift(array, 2, True))
        assert_array_equal(np.array([3.0, 4.0, 1.0, 2.0]),
                           dist.shift(array, -2, True))

        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.0]),
                           dist.shift(array, 3, True))
        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]),
                           dist.shift(array, -3, True))

        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, 4, True))
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, -4, True))

        assert_array_equal(np.array([4.0, 1.0, 2.0, 3.0]),
                           dist.shift(array, 5, True))
        assert_array_equal(np.array([2.0, 3.0, 4.0, 1.]),
                           dist.shift(array, -5, True))

        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, 8, True))
        assert_array_equal(np.array([1.0, 2.0, 3.0, 4.0]),
                           dist.shift(array, -8, True))