def test_gstat_optics(): """Test the GapStatClustering using OPTICS as the base clusterer. """ # OPTICS is incompatble with GapStatClustering # because it doesn't allow number of clusters to be specified with pytest.raises(AttributeError): gapstat.GapStatClustering(base_clusterer=OPTICS())
def test_gstat_meanshift(): """Test the GapStatClustering using MeanShift as the base clusterer. """ # meanshift is incompatble with GapStatClustering # because it doesn't allow number of clusters to be specified with pytest.raises(AttributeError): gapstat.GapStatClustering(base_clusterer=MeanShift())
def test_gstat_dbscan(): """Test the GapStatClustering using DBSCAN as the base clusterer. """ # dbscan is incompatble with GapStatClustering # because it doesn't allow number of clusters to be specified with pytest.raises(AttributeError): gapstat.GapStatClustering(base_clusterer=DBSCAN())
def test_gstat_affinitypropagation(): """Test the GapStatClustering using AffinityPropagation as the base clusterer. """ # affinitypropagation is incompatble with GapStatClustering # because it doesn't allow number of clusters to be specified with pytest.raises(AttributeError): gapstat.GapStatClustering(base_clusterer=AffinityPropagation())
def _run_transform_test(base_clusterer=None): """Test GapStatClustering.transform() and GapStatClustering.fit_transform() methods using the specified base clusterer. """ # construct test data T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2]) n_samples = T.shape[0] # create gapstat clusterer with base clusterer gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer) base_clusterer = gstat.base_clusterer # test transform() before fit() # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'transform')): # test transform() before fit() with pytest.raises(NotFittedError): gstat.transform(T) else: # test unsupported transform() with pytest.raises(AttributeError): gstat.transform(T) # test when max_k is too small # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'fit_transform')): # test when max_k is too small gstat.set_params(max_k=K-1) with pytest.raises(NotFittedError): gstat.fit_transform(T) else: # test unsupported fit_transform() with pytest.raises(AttributeError): gstat.fit_transform(T) # determine expected number of columns in transformed data # -- in most cases this is the number of clusters # -- for birch this is the number of subclusters if (isinstance(base_clusterer, Birch)): n_features = len(base_clusterer.subcluster_centers_) else: n_features = K # test fit_transform() # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'fit_transform')): gstat.set_params(max_k=K) transformed_T = gstat.fit_transform(T) _check_transformed(transformed_T, n_samples, n_features) # test transform() # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'transform')): transformed_T = gstat.transform(T) _check_transformed(transformed_T, n_samples, n_features)
def test_gstat_featureagglomeration(): """Test the GapStatClustering using FeatureAgglomeration as the base clusterer. """ # FeatureAgglomeration is incompatble with GapStatClustering # because it transforms the data set before clusering # which breaks some of the gapstat logic # Instead, use AgglomerativeClustering and transform the # data set before fitting the data with pytest.raises(AttributeError): gapstat.GapStatClustering(base_clusterer=FeatureAgglomeration())
def _run_fit_test(base_clusterer=None): """Test GapStatClustering.fit() method using the specified base clusterer. """ # construct test data T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2]) n_samples = T.shape[0] # create gapstat clusterer with base clusterer gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer) base_clusterer = gstat.base_clusterer # test when max_k is too small gstat.set_params(max_k=K-1) with pytest.raises(NotFittedError): gstat.fit(T) # test fit() gstat.set_params(max_k=K) gstat = gstat.fit(T) _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K)
def _run_predict_test(base_clusterer=None): """Test GapStatClustering.predict() and GapStatClustering.fit_predict() methods using the specified base clusterer. """ # construct test data T, _, K = _quad4(center=[0, 0], offset1=[0.5, 0.5], offset2=[2, 2]) n_samples = T.shape[0] # create gapstat clusterer with base clusterer gstat = gapstat.GapStatClustering(base_clusterer=base_clusterer) base_clusterer = gstat.base_clusterer # test predict() before fit() # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'predict')): # test predict() before fit() with pytest.raises(NotFittedError): gstat.predict(T) else: # test unsupported predict() with pytest.raises(AttributeError): gstat.predict(T) # test when max_k is too small gstat.set_params(max_k=K-1) with pytest.raises(NotFittedError): gstat.fit_predict(T) # test fit_predict() gstat.set_params(max_k=K) predicted_labels = gstat.fit_predict(T) _check_labels(predicted_labels, gstat.n_clusters_, n_samples, K) _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K) # test predict() # adapt test case to the base_cluster capabilities if (hasattr(base_clusterer, 'predict')): predicted_labels = gstat.predict(T) _check_labels(predicted_labels, gstat.n_clusters_, n_samples, K) _check_labels(gstat.labels_, gstat.n_clusters_, n_samples, K)