Python cluster示例

编程语言: Python

命名空间/包名称: hypertools

方法/功能: cluster

hotexamples.com的示例: 5

Python cluster - 已找到5个示例。这些是从开源项目中提取的最受好评的hypertools.cluster现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_cluster.py 项目： jeremymanning/hypertools

def test_discrete_clusters():
    def homogeneity_test(estimates, truth, threshold=0.95):
        for x in np.unique(estimates.values):
            if x == -1:
                continue
            inds = np.where(estimates.values == x)[0]
            if len(inds) > 0:
                zeros = truth.iloc[inds].values == 0
                ones = truth.iloc[inds].values == 1

                assert ((np.sum(zeros) / len(inds)) >= threshold) or ((np.sum(ones) / len(inds)) >= threshold)

    models = ['AffinityPropagation', 'AgglomerativeClustering', 'Birch', 'DBSCAN', 'OPTICS', 'FeatureAgglomeration',
              'KMeans', 'MiniBatchKMeans', 'MeanShift', 'SpectralClustering']

    for m in models:
        labels = hyp.cluster(clusters, model=m)
        homogeneity_test(labels, true_labels)

        labels2 = hyp.cluster([cluster1, cluster2], model=m)
        homogeneity_test(labels2[0], true_labels.iloc[:cluster1.shape[0]])
        homogeneity_test(labels2[1], true_labels.iloc[cluster1.shape[0]:])

示例#2

显示文件

def cluster(
    x,
    n_clusters=5
):  #x should be a dataframe with 1 row per video and 1 column per timepoint/topic -- e.g. the result of np.ravel(x0.values).T
    clustered_labels = hyp.cluster(x, cluster='KMeans', n_clusters=5)

    clusters = []
    for k in np.unique(clustered_labels):
        inds = np.where(
            clustered_labels == k
        )[0]  #might need to change clustered_labels to np.array(clustered_labels) in this line
        clusters.append(x.iloc[inds].copy())
    return clusters, clustered_labels  #clusters[0] is a number-of-cluster_0-videos by timepoints*topics dataframe; clusters[0].iloc[0] is the reshpaed trajectory from the first video from the first cluster (a 1 by timepoints*topics matrix)

示例#3

显示文件

文件： test_cluster.py 项目： jeremymanning/hypertools

def test_cluster_mixture():
    n_components = 3
    mode = 'fit_predict_proba'
    models = ['GaussianMixture', 'BayesianGaussianMixture']

    for m in models:
        next_model = {'model': m, 'args': [], 'kwargs': {'n_components': n_components}}
        mixture_proportions = hyp.cluster(clusters, model=next_model, mode=mode)

        assert mixture_proportions.shape == (clusters.shape[0], 3)
        assert np.all(np.sum(np.abs(mixture_proportions), axis=0) > 0)
        assert np.all(mixture_proportions >= 0)
        assert np.all(mixture_proportions <= 1)
        assert np.allclose(np.sum(mixture_proportions, axis=1), 1)

示例#4

显示文件

文件： plot_clusters2.py 项目： mutlay/hypertools

# -*- coding: utf-8 -*-
"""
=============================
Using the cluster function to label clusters
=============================

Here is an example where we generate some synthetic data, and then use the
cluster function to get cluster labels, which we can then pass to the `group`
kwarg to color our points by cluster.
"""

# Code source: Andrew Heusser
# License: MIT

# import
import hypertools as hyp
import numpy as np
from scipy.stats import multivariate_normal

# simulate clusters
cluster1 = np.random.multivariate_normal(np.zeros(3), np.eye(3), size=100)
cluster2 = np.random.multivariate_normal(np.zeros(3) + 3, np.eye(3), size=100)
data = np.vstack([cluster1, cluster2])

# get cluster labels
cluster_labels = hyp.cluster(data, n_clusters=2)

# plot
hyp.plot(data, '.', group=cluster_labels)

示例#5

显示文件

文件： hypertools.py 项目： thileepanp/Clustering

geo = hyp.plot(data, '.', reduce='FastICA')
geo = hyp.plot(data, '.', reduce='FactorAnalysis')
geo = hyp.plot(data, '.', reduce='TruncatedSVD')  #same results like PCA
geo = hyp.plot(data, '.',
               reduce='DictionaryLearning')  #took a long time to run
geo = hyp.plot(data, '.', reduce='MiniBatchDictionaryLearning')
geo = hyp.plot(data, '.', reduce='TSNE')  #takes long time to run
geo = hyp.plot(data, '.', reduce='Isomap')  #memory error
geo = hyp.plot(data, '.', reduce='SpectralEmbedding')  #system hangs
geo = hyp.plot(data, '.', reduce='LocallyLinearEmbedding')
geo = hyp.plot(data, '.', reduce='MDS')  #memory error

geo = hyp.plot(data, '.', reduce={'model': 'PCA', 'params': {'whiten': True}})

training_set = data.iloc[np.random.choice(len(data), 10000), :]
birch = hyp.cluster(training_set, cluster='Birch')
all_birch = birch.apply(data)

geo_cluster = hyp.plot(training_set, '.', cluster='HDBSCAN', n_clusters=6)

#Clustering

geo_cluster = hyp.plot(data, '.', n_clusters=6)
geo_cluster = hyp.plot(data, '.', cluster='KMeans', n_clusters=8)
geo_cluster = hyp.plot(data, '.', cluster='MiniBatchKMeans', n_clusters=8)
geo_cluster = hyp.plot(data,
                       '.',
                       cluster='AgglomerativeClustering',
                       n_clusters=8)  #memory error
geo_cluster = hyp.plot(
    data, '.', cluster='Birch', n_clusters=8