Python clustering_metrics 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: util.benchmark

메소드/함수: clustering_metrics

hotexamples.com에서의 예제들: 5

Python clustering_metrics - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 util.benchmark.clustering_metrics에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

from util import plot_util, preprocessing_util, benchmark, collection_reader

if __name__ == "__main__":
    # Read data
    books = collection_reader.read_books_from_mongo()
    documents = collection_reader.extract_corpus(books)
    print("{} books:".format(len(documents)))
    print([book["book_id3"] for book in books])
    print()

    # Create term-document representation
    X = preprocessing_util.convert_to_term_document(documents,
                                                    min_df=0.1,
                                                    max_df=0.9)

    # SVD
    X = preprocessing_util.apply_svd(X, min(X.shape))

    ###############################################################################
    # Do the actual clustering
    print("Clustering data")
    k = 4

    method = DBSCAN(eps=0.8, min_samples=1).fit(X)

    # Metrics
    benchmark.clustering_metrics(X, method.labels_)

    # Create a 3d scatter plot of the corpus
    plot_util.create_3d_plot_for_sparse_matrix(X, method.labels_)

예제 #2

파일 보기

파일: clustering_agglomerative.py 프로젝트: rgualan/soton-dm-google-books

                                                    min_df=0.1,
                                                    max_df=0.9)

    # SVD
    X = preprocessing_util.apply_svd(X, min(X.shape))

    # Cosine similarity matrix
    dist = 1 - cosine_similarity(X)

    ###############################################################################
    # Do the actual clustering
    k = 5

    # linkage: ward, average, complete
    # affinity: cosine, euclidean, cityblock
    ac = AgglomerativeClustering(linkage="average",
                                 n_clusters=k,
                                 affinity="cosine")

    print("Clustering sparse data with {}".format(ac))
    t0 = time()
    ac.fit(dist)
    print("done in {}".format(time() - t0))
    print()

    # Metrics
    benchmark.clustering_metrics(X, ac.labels_)

    # Create a 3d scatter plot of the corpus
    plot_util.create_3d_plot_for_sparse_matrix(X, ac.labels_)

예제 #3

파일 보기

파일: clustering_kmeans.py 프로젝트: rgualan/soton-dm-google-books

    ###############################################################################
    # Do the actual clustering
    k = 5

    import matplotlib.pyplot as plt

    the_metrics = []
    for i in range(1):
        km = KMeans(n_clusters=k, verbose=False)
        #print("Clustering sparse data with {}".format(km))
        #t0 = time()
        km.fit(X)
        #print("done in %0.3fs" % (time() - t0))
        #print()

        # Metrics
        the_metrics.append(benchmark.clustering_metrics(X, km.labels_))

        # Plot:
        # create_2d_plot_for_sparse_matrix(X, km.labels_)
        plot_util.create_3d_plot_for_sparse_matrix(X, km.labels_, block=False)

    print("All the metrics: ")
    for a in the_metrics:
        print(a)

    plt.show()
    print("Done!")

예제 #4

파일 보기

    # Read data
    books = collection_reader.read_books_from_mongo()
    documents = collection_reader.extract_corpus(books)
    print("{} books:".format(len(documents)))
    print([book["book_id3"] for book in books])
    print()

    # Create term-document representation
    X = preprocessing_util.convert_to_term_document(documents,
                                                    min_df=0.1,
                                                    max_df=0.9)

    # SVD
    X = preprocessing_util.apply_svd(X, min(X.shape))

    ###############################################################################
    # Do the actual clustering
    print("Clustering data")
    k = 5

    model = Birch(branching_factor=8,
                  n_clusters=k,
                  threshold=0.5,
                  compute_labels=True)
    model.fit(X)

    # Metrics
    benchmark.clustering_metrics(X, model.predict(X))

    # Create a 3d scatter plot of the corpus
    plot_util.create_3d_plot_for_sparse_matrix(X, model.predict(X))

예제 #5

파일 보기

파일: clustering_gaussian_mixture.py 프로젝트: rgualan/soton-dm-google-books

from sklearn import mixture

from util import plot_util, preprocessing_util, benchmark, collection_reader

if __name__ == "__main__":
    # Read data
    books = collection_reader.read_books_from_mongo()
    documents = collection_reader.extract_corpus(books)
    print("{} books:".format(len(documents)))
    print([book["book_id3"] for book in books])
    print()

    # Create term-document representation
    X = preprocessing_util.convert_to_term_document(documents, min_df=0.1, max_df=0.9)

    # SVD
    X = preprocessing_util.apply_svd(X, min(X.shape))

    ###############################################################################
    # Do the actual clustering
    print("Clustering data")
    k = 5

    method = mixture.GaussianMixture(n_components=k, covariance_type='full').fit(X)

    # Metrics
    benchmark.clustering_metrics(X, method.predict(X))

    # Create a 3d scatter plot of the corpus
    plot_util.create_3d_plot_for_sparse_matrix(X, method.predict(X))