def predict(self, X): """Predict the closest cluster each sample in X belongs to. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data to predict. Returns ------- labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ self._check_fitted() X = self._check_test_data(X) return _compute_labels_and_score(X, self.cluster_centers_)[0]
def score(self, X): """Opposite of the value of X on the K-means objective. Parameters ---------- X : {array-like, sparse matrix}, shape = [n_samples, n_features] New data. Returns ------- score : float Opposite of the value of X on the K-means objective. """ self._check_fitted() X = self._check_test_data(X) return -_compute_labels_and_score(X, self.cluster_centers_)[1]
def test_labels_assignment_and_score(): # pure numpy implementation as easily auditable reference gold # implementation rng = np.random.RandomState(42) noisy_centers = centers + rng.normal(size=centers.shape) labels_gold = - np.ones(n_samples, dtype=np.int) mindist = np.empty(n_samples) mindist.fill(np.infty) for center_id in range(n_clusters): dist = np.sqrt(np.sum((X - noisy_centers[center_id]) ** 2, axis=1)) labels_gold[dist < mindist] = center_id mindist = np.minimum(dist, mindist) score_gold = mindist.sum() assert_true((mindist >= 0.0).all()) assert_true((labels_gold != -1).all()) # perform label assignment using the dense array input labels_array, score_array = _compute_labels_and_score(X, noisy_centers) assert_array_almost_equal(score_array, score_gold) assert_array_equal(labels_array, labels_gold)