def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 3
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [1, 2,
                          3]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #2
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 50
    var = 1.0  # exponential_euclidean's sigma^2

    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)

    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 3
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [1, 2,
                          3]  # indices of the ordered eigenvalues to pick

    # build laplacian
    W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #4
0
def point_and_circle_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600, sigma=.2)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    eps = 0.4
    var = 1  # exponential_euclidean's sigma^2

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'rw')

    Y_unn = spectral_clustering_adaptive(L_unn, num_classes=num_classes)
    Y_norm = spectral_clustering_adaptive(L_norm, num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
예제 #5
0
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    eps = 0.8
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # spectral clustering
    Y_rec = spectral_clustering_adaptive(L, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #6
0
def point_and_circle_clustering(eig_max=15):
    """
    TO BE COMPLETED.

    Used in question 2.8
    """
    # Generate data and compute number of clusters
    X, Y = point_and_circle(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    #chosen_eig_indices = [1, 2, 3]    # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-1]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
        W = build_similarity_graph(X, var=var, eps=eps, k=k)

    # build laplacian
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L_unn = build_laplacian(W, 'unn')
    L_norm = build_laplacian(W, 'sym')

    #eigenvalues,U = np.linalg.eig(L_unn)
    #indexes = np.argsort(eigenvalues)
    #eigenvalues = eigenvalues[indexes]
    #U = U[:,indexes]
    #chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max = eig_max)
    chosen_eig_indices = [0, 1]

    Y_unn = spectral_clustering(L_unn,
                                chosen_eig_indices,
                                num_classes=num_classes)
    Y_norm = spectral_clustering(L_norm,
                                 chosen_eig_indices,
                                 num_classes=num_classes)

    plot_clustering_result(X, Y, L_unn, Y_unn, Y_norm, 1)
예제 #7
0
def two_moons_clustering(eig_max=15):
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'

    #    chosen_eig_indices = [0, 1, 2]    # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-1]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    L = build_laplacian(W, laplacian_normalization)

    # chose the eigenvalues
    eigenvalues, U = np.linalg.eig(L)
    indexes = np.argsort(eigenvalues)
    eigenvalues = eigenvalues[indexes]
    U = U[:, indexes]
    chosen_eig_indices = choose_eigenvalues(eigenvalues, eig_max=eig_max)

    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #8
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(50, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1,
                          2]  # indices of the ordered eigenvalues to pick

    if k == 0:  # compute epsilon
        dists = sd.cdist(
            X, X, 'euclidean'
        )  # dists[i, j] = euclidean distance between x_i and x_j

        min_tree = min_span_tree(dists)

        l = []
        n1, m1 = min_tree.shape

        for i in range(n1):
            for j in range(m1):
                if min_tree[i][j] == True:
                    l.append(dists[i][j])

        #distance_threshold = sorted(l)[-1]
        distance_threshold = sorted(l)[-2]

        eps = np.exp(-(distance_threshold)**2.0 / (2 * var))
    #####

    # build laplacian
    W = build_similarity_graph(X, var=var, eps=eps, k=k)
    plot_graph_matrix(X, Y, W)

    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #9
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    question = '2.2'

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    n = X.shape[0]
    """
    Choose parameters
    """
    var = 1.0  # exponential_euclidean's sigma^2
    laplacian_normalization = 'rw'

    if question == '2.1':
        # as the graph has to be connected in this question, we construct a epsilon-graph using a MST
        dists = pairwise_distances(X).reshape(
            (n, n))  # dists[i, j] = euclidean distance between x_i and x_j
        min_tree = min_span_tree(dists)
        distance_threshold = np.max(dists[min_tree])
        eps = np.exp(-distance_threshold**2 / (2 * var))

        # choice of eigenvectors to use
        chosen_eig_indices = [1]  # indices of the ordered eigenvalues to pick

        # build similarity graph and laplacian
        W = build_similarity_graph(X, var=var, eps=eps)
        L = build_laplacian(W, laplacian_normalization)

    elif question == '2.2':
        # choice of eigenvectors to use
        chosen_eig_indices = [0, 1]

        # choice of k for the k-nn graph
        k = 20

        # build similarity graph and laplacian
        W = build_similarity_graph(X, var=var, k=k)
        L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))
예제 #10
0
def main():
    X = generate_dataset(shape="blobs")
    D = pairwise_distances(X)  # euclidean distance as distance metric
    A = gaussian_kernel(D, is_sym=True)  # Gaussian distance as affinity metric

    # K-MEANS
    clusters, _ = apply_kmeans(X)
    plot_clustering_result(X,
                           A,
                           clusters,
                           clustering_name="K means clustering")

    # DBSCAN
    clusters, noise = apply_dbscan(X, D)
    plot_clustering_result(X,
                           A,
                           clusters,
                           noise,
                           clustering_name="DBSCAN clustering")

    # EIGENVECTOR BASED CLUSTERING
    A_eigen = gaussian_kernel(
        D, mult=0.05, is_sym=True)  # Gaussian distance as affinity metric
    clusters, noise = apply_eigenvector_based(X, A_eigen)
    plot_clustering_result(X,
                           A_eigen,
                           clusters,
                           noise,
                           clustering_name="Eigenvector based clustering")
예제 #11
0
def two_moons_clustering():
    """
    TO BE COMPLETED.

    Used in question 2.7
    """
    # Generate data and compute number of clusters
    X, Y = two_moons(600)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    # build laplacian
    if k == 0:
        dists = sd.cdist(X, X, metric="euclidean")
        min_tree = min_span_tree(dists)
        distance_threshold = dists[min_tree].max()
        eps = np.exp(-distance_threshold**2.0 / (2 * var))
        print(eps)
        W = build_similarity_graph(X, var=var, k=k, eps=eps)
    else:
        W = build_similarity_graph(X, var=var, k=k)
    L = build_laplacian(W, laplacian_normalization)

    #    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)
    #
    #    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))

    Y_rec_adaptive = spectral_clustering_adaptive(L, num_classes=num_classes)

    plot_clustering_result(X, Y, L, Y_rec_adaptive,
                           KMeans(num_classes).fit_predict(X))
예제 #12
0
def two_blobs_clustering():
    """
    TO BE COMPLETED

    Clustering of two blobs. Used in questions 2.1 and 2.2
    """

    # Get data and compute number of classes
    X, Y = blobs(600, n_blobs=2, blob_var=0.15, surplus=0)
    num_classes = len(np.unique(Y))
    """
    Choose parameters
    """
    k = 0
    var = 1.0  # exponential_euclidean's sigma^2

    laplacian_normalization = 'unn'
    chosen_eig_indices = [0, 1]  # indices of the ordered eigenvalues to pick

    # build laplacian
    if k == 0:
        dists = sd.cdist(X, X, metric="euclidean")
        min_tree = min_span_tree(dists)
        distance_threshold = dists[min_tree].max()
        eps = np.exp(-distance_threshold**2.0 / (2 * var))

        W = build_similarity_graph(X, var=var, k=k, eps=eps)
    else:
        W = build_similarity_graph(X, var=var, k=k)

    L = build_laplacian(W, laplacian_normalization)

    # run spectral clustering
    Y_rec = spectral_clustering(L, chosen_eig_indices, num_classes=num_classes)

    # Plot results
    plot_clustering_result(X, Y, L, Y_rec, KMeans(num_classes).fit_predict(X))