def test_correct_number_of_clusters(): # in 'auto' mode n_clusters = 3 X = generate_clustered_data(n_clusters=n_clusters) # Parameters chosen specifically for this task. # Compute OPTICS clust = OPTICS(max_eps=5.0 * 6.0, min_samples=4, xi=.1) clust.fit(X) # number of clusters, ignoring noise if present n_clusters_1 = len(set(clust.labels_)) - int(-1 in clust.labels_) assert n_clusters_1 == n_clusters # check attribute types and sizes assert clust.labels_.shape == (len(X),) assert clust.labels_.dtype.kind == 'i' assert clust.reachability_.shape == (len(X),) assert clust.reachability_.dtype.kind == 'f' assert clust.core_distances_.shape == (len(X),) assert clust.core_distances_.dtype.kind == 'f' assert clust.ordering_.shape == (len(X),) assert clust.ordering_.dtype.kind == 'i' assert set(clust.ordering_) == set(range(len(X)))
def test_bad_reachability(): msg = "All reachability values are inf. Set a larger max_eps." centers = [[1, 1], [-1, -1], [1, -1]] X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4, random_state=0) with pytest.warns(UserWarning, match=msg): clust = OPTICS(max_eps=5.0 * 0.003, min_samples=10, eps=0.015) clust.fit(X)
def test_min_cluster_size(min_cluster_size): redX = X[::2] # reduce for speed clust = OPTICS(min_samples=9, min_cluster_size=min_cluster_size).fit(redX) cluster_sizes = np.bincount(clust.labels_[clust.labels_ != -1]) if cluster_sizes.size: assert min(cluster_sizes) >= min_cluster_size # check behaviour is the same when min_cluster_size is a fraction clust_frac = OPTICS(min_samples=9, min_cluster_size=min_cluster_size / redX.shape[0]) clust_frac.fit(redX) assert_array_equal(clust.labels_, clust_frac.labels_)
def test_wrong_cluster_method(): clust = OPTICS(cluster_method='superfancy') with pytest.raises(ValueError, match="cluster_method should be one of "): clust.fit(X)
def test_min_cluster_size_invalid2(): clust = OPTICS(min_cluster_size=len(X) + 1) with pytest.raises(ValueError, match="must be no greater than the "): clust.fit(X)
def test_min_cluster_size_invalid(min_cluster_size): clust = OPTICS(min_cluster_size=min_cluster_size) with pytest.raises(ValueError, match="must be a positive integer or a "): clust.fit(X)