Exemplo n.º 1
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 50
    var = 1.0  # exponential_euclidean's sigma^2

    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)

    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Exemplo n.º 2
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600, sigma=.2)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    eps = 0.4
    var = 1  # exponential_euclidean's sigma^2

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering_adaptive(L_unn, num_classes=num_classes)
    Y_norm = spectral_clustering_adaptive(L_norm, num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Exemplo n.º 3
0
def point_and_circle_clustering(eig_max=15):
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    #chosen_eig_indices = [1, 2, 3]    # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-1]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
        W = build_similarity_graph(X, var=var, eps=eps, k=k)

    # build laplacian
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'sym')

    #eigenvalues,U = np.linalg.eig(L_unn)
    #indexes = np.argsort(eigenvalues)
    #eigenvalues = eigenvalues[indexes]
    #U = U[:,indexes]
    #chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max = eig_max)
    chosen_eig_indices = [0, 1]

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
Exemplo n.º 4
0
    distance_threshold = np.max(dists[min_tree])
    eps = np.exp(-distance_threshold**2 / (2 * var))
    """
    use the build_similarity_graph function to build the graph W  
     W: (n x n) dimensional matrix representing                    
        the adjacency matrix of the graph
       use plot_graph_matrix to plot the graph                    
    """
    W = build_similarity_graph(X, var=var, eps=eps, k=0)
    plot_graph_matrix(X, Y, W)


if __name__ == '__main__':
    n = 300
    blobs_data, blobs_clusters = blobs(n)
    moons_data, moons_clusters = two_moons(n)
    point_circle_data, point_circle_clusters = point_and_circle(n)
    worst_blobs_data, worst_blobs_clusters = worst_case_blob(n, 1.0)

    var = 1

    X, Y = moons_data, moons_clusters
    n_samples = X.shape[0]
    dists = pairwise_distances(X).reshape((n_samples, n_samples))
    min_tree = min_span_tree(dists)
    eps = np.exp(-np.max(dists[min_tree])**2 / (2 * var))
    W_eps = build_similarity_graph(X, var=var, eps=0.6)
    W_knn = build_similarity_graph(X, k=15)

    plot_graph_matrix(X, Y, W_eps)
    plot_graph_matrix(X, Y, W_knn)