Python KMeans.fit примеры использования

Язык программирования: Python

Пространство имен/Пакет: scikits.learn.cluster

Класс/Тип: KMeans

Метод/Функция: fit

Примеров на hotexamples.com: 9

Python KMeans.fit - 9 примеров найдено. Это лучшие примеры Python кода для scikits.learn.cluster.KMeans.fit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

KMeans(13)

fit(4)

Пример #1

Показать файл

Файл: utils.py Проект: lincolnloop/django-geotagging-new

def cluster_objects(objects, optimize_within_clusters=False, round_trip=False,
                    initial=None):
    """
    Return a list of objects clustered by geographical position.

    :param objects: The list of objects or a queryset. The objects
    must be an instance of PointGeoTag or implement
    `get_point_coordinates(self, as_string=False, inverted=False)` to
    obtain the coordinates

    :param optimize_within_clusters: a boolean specifying if the
    clusters should be ordered based on the (near-)optimal route.

    :returns: A list of clusters. Example: [[<p1>, <p2>], [<p3>, <p4>, <p5>]]
    """
    X = np.array([list(i.get_point_coordinates(as_string=False, inverted=True))
                  for i in objects 
                  if i.get_point_coordinates(as_string=False, inverted=True)])

    # Afinity propagation. 
    # This way we can determine the number of clusters automatically
    # X_norms = np.sum(X*X, axis=1)
    # S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T)
    # p = 10*np.median(S)
    # af = AffinityPropagation()
    # af.fit(S, p)
    # n_clusters_ = len(af.cluster_centers_indices_)

    n_items = len(X)
    max_items = getattr(settings, 'ITEMS_PER_BUCKET', 10) - 1
    n_clusters = n_items / max_items
    #n_clusters += n_items % max_items == 0 and 0 or 1

    # KMeans. 
    # If we want a pre-specified number of clusters this is the way to go 
    km = KMeans(k=n_clusters, init='k-means++')
    km.fit(X)

    cluster_dict = defaultdict(list)
    for i, cluster_id in enumerate(km.labels_):
        cluster_dict[cluster_id].append(objects[i])

    clusters = cluster_dict.values()
    if optimize_within_clusters:
        if initial:
            result = []
            for cluster in clusters:
                if initial in cluster:
                    cluster.remove(initial)
                    cluster.insert(0, initial)
                    result.insert(0, google_TSP(cluster, round_trip=round_trip))
                else:
                    result.append(google_TSP(cluster, round_trip=round_trip))
            return result
        else:
            return [google_TSP(cluster, round_trip=round_trip) for cluster in clusters]
    return clusters

Пример #2

Показать файл

Файл: extract.py Проект: athuras/attention

def cluster_centroids(x, k=32, max_iter=300, km_kwargs={}):
    '''Return norm-ordered centroids'''
    km = KMeans(k, init='k-means++', max_iter=300, **km_kwargs)
    trained = km.fit(x)
    centroids = trained.cluster_centers_
    ind = np.argsort(np.linalg.norm(centroids, axis=1))
    return centroids[ind]

Пример #3

Показать файл

Файл: plot_mini_batch_kmeans.py Проект: jolos/scikit-learn

##############################################################################
# Generate sample data
np.random.seed(0)

batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=1200, centers=centers, cluster_std=0.7)

##############################################################################
# Compute clustering with Means

k_means = KMeans(init='k-means++', k=3)
t0 = time.time()
k_means.fit(X)
t_batch = time.time() - t0
k_means_labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels_unique = np.unique(k_means_labels)

##############################################################################
# Compute clustering with MiniBatchKMeans

mbk = MiniBatchKMeans(init='k-means++', k=3, chunk_size=batch_size)
t0 = time.time()
mbk.fit(X)
t_mini_batch = time.time() - t0
mbk_means_labels = mbk.labels_
mbk_means_cluster_centers = mbk.cluster_centers_
mbk_means_labels_unique = np.unique(mbk_means_labels)

Пример #4

Показать файл

Файл: plot_k_means.py Проект: mosi/scikit-learn

n_points_per_cluster = 250
n_clusters = 3
n_points = n_points_per_cluster*n_clusters
means = np.array([[1,1],[-1,-1],[1,-1]])
std = .6
clustMed = []

X = np.empty((0, 2))
for i in range(n_clusters):
    X = np.r_[X, means[i] + std * np.random.randn(n_points_per_cluster, 2)]

################################################################################
# Compute clustering with KMeans
km = KMeans(init='k-means++', k=3, n_init=1)
km.fit(X);

labels = km.labels_
cluster_centers = km.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

print "number of estimated clusters : %d" % n_clusters_

################################################################################
# Plot result
import pylab as pl
from itertools import cycle

pl.figure(1)

Пример #5

Показать файл

Файл: som_digits.py Проект: scampion/scikit-learn

print

################################################################################
# Digits dataset clustering using Self-Organizing Map

print "Self-Organizing Map "
t0 = time()
grid_width = 4
som = SelfOrganizingMap(size=grid_width, n_iterations=n_samples*5,
                        learning_rate=1)
som.fit(data)
print "done in %0.3fs" % (time() - t0)
print

F = pseudo_F(data, som.labels_, som.neurons_)
print 'pseudo_F %0.2f | %0.2f%%' % (F, 100 * (F / (1 + F)))
print

################################################################################
# Digits dataset clustering using Kmeans

print "KMeans "
t0 = time()
km = KMeans(init='k-means++', k=grid_width**2, n_init=10)
km.fit(data)
print "done in %0.3fs" % (time() - t0)
print

F = pseudo_F(data, km.labels_, km.cluster_centers_)
print 'pseudo_F %0.2f | %0.2f%%' % (F, 100 * (F / (1 + F)))

Пример #6

Показать файл

# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
from __future__ import division
import numpy as np
import pickle
from scikits.learn.cluster import KMeans
import logging 
import time

logging.basicConfig(level=logging.DEBUG)

LEARN_SIZE = 100
K = 8
ITER = 10 

moto,plane  = [pickle.load(open(file)) 
               for file in ['moto','plane']]

logging.info('Data loaded') 

m   = np.vstack([v for f,v in  moto.items()[0:LEARN_SIZE]])
p   = np.vstack([v for f,v in plane.items()[0:LEARN_SIZE]])
all = np.vstack([m,p])

km = KMeans(k=K,max_iter=ITER)
km.fit(all) 

filename = 'centroids_%d_%d_%d' % (LEARN_SIZE,K,ITER)
pickle.dump(km.cluster_centers_,open(filename,'wb'))

Пример #7

Показать файл

Файл: plot_k_means.py Проект: mosi/scikit-learn

n_points_per_cluster = 250
n_clusters = 3
n_points = n_points_per_cluster * n_clusters
means = np.array([[1, 1], [-1, -1], [1, -1]])
std = .6
clustMed = []

X = np.empty((0, 2))
for i in range(n_clusters):
    X = np.r_[X, means[i] + std * np.random.randn(n_points_per_cluster, 2)]

################################################################################
# Compute clustering with KMeans
km = KMeans(init='k-means++', k=3, n_init=1)
km.fit(X)

labels = km.labels_
cluster_centers = km.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

print "number of estimated clusters : %d" % n_clusters_

################################################################################
# Plot result
import pylab as pl
from itertools import cycle

pl.figure(1)

Пример #8

Показать файл

##############################################################################
# Generate sample data
np.random.seed(0)

batch_size = 45
centers = [[1, 1], [-1, -1], [1, -1]]
n_clusters = len(centers)
X, labels_true = make_blobs(n_samples=1200, centers=centers, cluster_std=0.7)

##############################################################################
# Compute clustering with Means

k_means = KMeans(init='k-means++', k=3)
t0 = time.time()
k_means.fit(X)
t_batch = time.time() - t0
k_means_labels = k_means.labels_
k_means_cluster_centers = k_means.cluster_centers_
k_means_labels_unique = np.unique(k_means_labels)

##############################################################################
# Compute clustering with MiniBatchKMeans

mbk = MiniBatchKMeans(init='k-means++', k=3, chunk_size=batch_size)
t0 = time.time()
mbk.fit(X)
t_mini_batch = time.time() - t0
mbk_means_labels = mbk.labels_
mbk_means_cluster_centers = mbk.cluster_centers_
mbk_means_labels_unique = np.unique(mbk_means_labels)

Пример #9

Показать файл

Файл: utils.py Проект: lincolnloop/django-geotagging-new

def cluster_objects(objects,
                    optimize_within_clusters=False,
                    round_trip=False,
                    initial=None):
    """
    Return a list of objects clustered by geographical position.

    :param objects: The list of objects or a queryset. The objects
    must be an instance of PointGeoTag or implement
    `get_point_coordinates(self, as_string=False, inverted=False)` to
    obtain the coordinates

    :param optimize_within_clusters: a boolean specifying if the
    clusters should be ordered based on the (near-)optimal route.

    :returns: A list of clusters. Example: [[<p1>, <p2>], [<p3>, <p4>, <p5>]]
    """
    X = np.array([
        list(i.get_point_coordinates(as_string=False, inverted=True))
        for i in objects
        if i.get_point_coordinates(as_string=False, inverted=True)
    ])

    # Afinity propagation.
    # This way we can determine the number of clusters automatically
    # X_norms = np.sum(X*X, axis=1)
    # S = - X_norms[:,np.newaxis] - X_norms[np.newaxis,:] + 2 * np.dot(X, X.T)
    # p = 10*np.median(S)
    # af = AffinityPropagation()
    # af.fit(S, p)
    # n_clusters_ = len(af.cluster_centers_indices_)

    n_items = len(X)
    max_items = getattr(settings, 'ITEMS_PER_BUCKET', 10) - 1
    n_clusters = n_items / max_items
    #n_clusters += n_items % max_items == 0 and 0 or 1

    # KMeans.
    # If we want a pre-specified number of clusters this is the way to go
    km = KMeans(k=n_clusters, init='k-means++')
    km.fit(X)

    cluster_dict = defaultdict(list)
    for i, cluster_id in enumerate(km.labels_):
        cluster_dict[cluster_id].append(objects[i])

    clusters = cluster_dict.values()
    if optimize_within_clusters:
        if initial:
            result = []
            for cluster in clusters:
                if initial in cluster:
                    cluster.remove(initial)
                    cluster.insert(0, initial)
                    result.insert(0, google_TSP(cluster,
                                                round_trip=round_trip))
                else:
                    result.append(google_TSP(cluster, round_trip=round_trip))
            return result
        else:
            return [
                google_TSP(cluster, round_trip=round_trip)
                for cluster in clusters
            ]
    return clusters