import numpy
import pylab
from unsupervised.kmeans import KMeans

if __name__ == "__main__":
    numpy.random.seed(1)
    X = numpy.vstack((numpy.random.randn(10000, 2)*0.3,
                      numpy.random.randn(10000, 2)*0.3 + numpy.ones(2)))

    estimator = KMeans(2, 200, 10)
    estimator.fit(X)
    print estimator.C_
    print estimator.v
    Y = estimator.predict(X)
    print Y

    pylab.plot(X[:, 0], X[:, 1], "o")
    pylab.plot([estimator.C_[0, 0]], [estimator.C_[0, 1]], "o")
    pylab.plot([estimator.C_[1, 0]], [estimator.C_[1, 1]], "o")
    pylab.show()

if __name__ == "__main__":
    numpy.random.seed(0)

    train_images, T = load_mnist("training", 60000)
    test_images, T2 = load_mnist("testing", 10000)
    print "Dataset loaded"

    train_cluster = train_images[:10000]
    train_classifier = train_images
    label_classifier = T
    n_filters = 196
    estimator = KMeans(n_filters=n_filters, batch_size=1000, n_iterations=10)
    estimator.fit(train_cluster)
    X = estimator.predict(train_classifier)
    X2 = estimator.predict(test_images)
    X_mean = X.mean(axis=0)
    X_std = X.std(axis=0) + 1e-8
    X = scale_features(X, X_mean, X_std)
    X2 = scale_features(X2, X_mean, X_std)
    print "Transformed datasets"

    test_classifier(X, label_classifier, X2, T2)

    pylab.figure()
    pylab.subplots_adjust(wspace=0.0, hspace=0.0)
    n_cells = numpy.min((int(numpy.sqrt(n_filters)), 10))
    for i in range(n_cells**2):
        pylab.subplot(n_cells, n_cells, i + 1)
        pylab.imshow(estimator.C_[i].reshape(28, 28),
        data = numpy.fmax(numpy.fmin(data, pstd), -pstd) / pstd
        data = (data + 1) * 0.4 + 0.1;
        return data
    images = normalize_data(images)

    patch_width = 8
    n_filters = 25

    n_samples, n_rows, n_cols = images.shape
    n_features = n_rows * n_cols
    patches = [extract_patches_2d(images[i], (patch_width, patch_width),
                                  max_patches=1000, random_state=i)
            for i in range(n_samples)]
    patches = numpy.array(patches).reshape(-1, patch_width * patch_width)
    print("Dataset consists of %d samples" % n_samples)

    estimator = KMeans(n_filters=n_filters, batch_size=1000, n_iterations=200)
    estimator.fit(patches)
    print estimator.predict(patches)

    pylab.figure()
    for i in range(estimator.C_.shape[0]):
        rows = max(int(numpy.sqrt(n_filters)), 2)
        cols = max(int(numpy.sqrt(n_filters)), 2)
        pylab.subplot(rows, cols, i + 1)
        pylab.imshow(estimator.C_[i].reshape(patch_width, patch_width),
                     cmap=pylab.cm.gray, interpolation="nearest")
        pylab.xticks(())
        pylab.yticks(())
    pylab.show()
示例#4
0
    # Clustering
    kmeans = KMeans(k=n_centers, iterations=max_iterations, random_state=random_state, track_history=True)
    kmeans.fit(X)

    # Extract centroids
    centroids = kmeans.history_centroids

    # Create decision boundary data
    h = .1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    area_data = np.c_[xx.ravel(), yy.ravel()]

    # Prepare predictions
    predicted_labels = []
    predicted_area = []
    for iteration in range(max_iterations):
        kmeans.centroids = centroids[iteration]
        area = np.array(kmeans.predict(area_data))
        area = area.reshape(xx.shape)
        predicted_labels.append(kmeans.predict(X))
        predicted_area.append(area)

    # Plotting and showing the animation.
    fig, ax = plt.subplots(figsize=(15, 6), dpi=80)
    animation = FuncAnimation(fig, update, frames=max_iterations, interval=800, repeat=False)
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.show()