def fit(self, samples):
        """
        Apply Spectral Clustering algorithm as described in Ng et al. 2002
        Affinity matrix calculated with scaling parameter as described in Zelnik-Manor et al. 2005
        Clustering algorithm used: custom simple KMeans
        :param samples: data samples to cluster
        :return: labels associated with cluster
        """
        self.samples = samples
        self.samples_len = len(samples)

        # Compute affinity matrix (A)
        affinity = self._affinity_matrix()

        # Square root of diagonal matrix (D) composed of the sum of each of A's rows => D^1/2
        d = np.diag(np.power(np.sum(affinity, axis=0), -1 / 2))

        # Compute laplacian matrix (L) using formula L = D^1/2 . A . D^1/2
        laplacian = d @ affinity @ d

        # Eigenvectors of L stacked as a matrix (X)
        _, eig_vecs = sp.sparse.linalg.eigs(laplacian, k=self.k)

        # Normalize X using formula X / sum(X^2)^1/2 which gives us a data sample representation (Y)
        normalized_eig_vecs = eig_vecs / np.linalg.norm(
            eig_vecs, axis=1, keepdims=True)

        # Fit a KMeans algorithm to Y and receive cluster labels
        kmeans = KMeans(k=self.k)

        return kmeans.fit(normalized_eig_vecs)
示例#2
0
import numpy as np
import matplotlib.pyplot as plt

from unsupervised.kmeans import KMeans

n_samples = 100
n_features = 2

X = np.random.rand(n_samples, n_features)

inertias = []

for k in range(1, 10):  # len(X)):

    kmeans = KMeans(k=k)
    kmeans.fit(X)

    print(kmeans.inertia_)
    inertias.append(kmeans.inertia_)

plt.figure(figsize=(10, 20))
plt.plot(range(1, len(inertias) + 1), inertias)
plt.show()
示例#3
0
    parser.add_argument('--center', type=int, help='Number of data centers.', default=3)
    parser.add_argument('--random_state', type=int, help='Random state for data generation.', default=42)
    parser.add_argument('--n_samples', type=int, help='Number of data points.', default=5000)
    args = parser.parse_args()

    # Setting parameters
    max_iterations = args.max_iter
    n_centers = args.center
    n_samples = args.n_samples
    random_state = args.random_state

    # Create the clusters
    X, y = make_blobs(n_samples=n_samples, centers=n_centers, n_features=2, random_state=random_state, cluster_std=1.5)

    # Clustering
    kmeans = KMeans(k=n_centers, iterations=max_iterations, random_state=random_state, track_history=True)
    kmeans.fit(X)

    # Extract centroids
    centroids = kmeans.history_centroids

    # Create decision boundary data
    h = .1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
    area_data = np.c_[xx.ravel(), yy.ravel()]

    # Prepare predictions
    predicted_labels = []
    predicted_area = []