def test_shc_semi_supervised_scoring_data_affinity(): """Test semi-supervised learning for SHC when scoring_data='affinity'.""" # Passing feature matrix X1, y1 = generate_data(supervised=True, affinity=False) def _scoring1(X_affinity, labels_true, labels_pred): assert X_affinity.shape[0] == X_affinity.shape[1] assert X_affinity.shape != X1.shape score = b3_f_score(labels_true, labels_pred) return score clusterer = ScipyHierarchicalClustering(scoring=_scoring1, scoring_data="affinity", affinity=euclidean_distances) clusterer.fit(X1, y1) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels)) # Passing affinity matrix X2, y2 = generate_data(supervised=True, affinity=True) def _scoring2(X_affinity, labels_true, labels_pred): assert X_affinity.shape[0] == X_affinity.shape[1] assert X_affinity.shape == X2.shape score = b3_f_score(labels_true, labels_pred) return score clusterer = ScipyHierarchicalClustering(scoring=_scoring2, scoring_data="affinity", affinity="precomputed") clusterer.fit(X2, y2) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels))
def hcluster(X, attrs): """ Hierarchical Clustering. Return Example: {'children': [ {'children': [], 'name': 2, 'value': 150.0039243544126}, {'children': [ {'children': [], 'name': 1, 'value': 2.509279181210386}, {'children': [ {'children': [], 'name': 0, 'value': 2.4987419269136737}, {'children': [], 'name': 3, 'value': 2.4987419269136737} ], 'name': 4,'value': 4.997483853827347} ], 'name': 5, 'value': 5.018558362420772} ], 'name': 6, 'value': 300.0078487088252} """ n_clusters = int(attrs['kNumber']) hcluster = ScipyHierarchicalClustering(method=attrs['distance'], affinity=attrs['affinity'], n_clusters=n_clusters) hcluster.fit(X) labels = hcluster.labels_ # Z = hcluster.linkage_ # return HClusterTree(Z).to_dict() save_clusterer(hcluster) return scatterplot(X, labels, n_clusters)
def hcluster(X, attrs): """ Hierarchical Clustering. Return Example: {'children': [ {'children': [], 'name': 2, 'value': 150.0039243544126}, {'children': [ {'children': [], 'name': 1, 'value': 2.509279181210386}, {'children': [ {'children': [], 'name': 0, 'value': 2.4987419269136737}, {'children': [], 'name': 3, 'value': 2.4987419269136737} ], 'name': 4,'value': 4.997483853827347} ], 'name': 5, 'value': 5.018558362420772} ], 'name': 6, 'value': 300.0078487088252} """ n_clusters = int(attrs['kNumber']) hcluster = ScipyHierarchicalClustering(method=attrs['distance'], affinity=attrs['affinity'], n_clusters=n_clusters) hcluster.fit(X) labels = hcluster.labels_ # Z = hcluster.linkage_ # return HClusterTree(Z).to_dict() # save_clusterer(hcluster) return scatterplot(X, labels, n_clusters)
def test_shc_semi_supervised_scoring_data_none(): """Test semi-supervised learning for SHC when scoring_data is None.""" X, y = generate_data(supervised=True, affinity=False) def _scoring(labels_true, labels_pred): score = b3_f_score(labels_true, labels_pred) return score # We should find all 4 clusters clusterer = ScipyHierarchicalClustering(scoring=_scoring) clusterer.fit(X, y) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels))
def test_shc_semi_supervised_scoring_data_raw(): """Test semi-supervised learning for SHC when scoring_data='raw'.""" X, y = generate_data(supervised=True, affinity=False) def _scoring(X_raw, labels_true, labels_pred): assert X_raw.shape == X.shape score = b3_f_score(labels_true, labels_pred) return score clusterer = ScipyHierarchicalClustering(scoring=_scoring, scoring_data="raw") clusterer.fit(X, y) labels = clusterer.labels_ assert_array_equal([25, 25, 25, 25], np.bincount(labels))