def test_partial_fit(): """Test partial_fit.""" blocks = (y <= 1) clusterer1 = BlockClustering(blocking="precomputed", base_estimator=MiniBatchKMeans(n_clusters=2)) clusterer1.partial_fit(X[y <= 1], blocks=blocks[y <= 1]) assert_equal(len(clusterer1.clusterers_), 1) clusterer1.partial_fit(X[y > 1], blocks=blocks[y > 1]) assert_equal(len(clusterer1.clusterers_), 2) clusterer2 = BlockClustering(blocking="precomputed", base_estimator=MiniBatchKMeans(n_clusters=2)) clusterer2.fit(X, blocks=blocks) c1 = clusterer1.predict(X, blocks=blocks) c2 = clusterer2.labels_ assert_equal(paired_f_score(c1, c2), 1.0)
truth = data["y"] print("hello") # Block clustering with fixed threshold block_clusterer = BlockClustering( blocking=block_last_name_first_initial, base_estimator=ScipyHierarchicalClustering(threshold=0.5, affinity=affinity, method="complete"), verbose=3, n_jobs=-1) block_clusterer.fit(X) labels = block_clusterer.labels_ # Print clusters for cluster in np.unique(labels): entries = set() for name, affiliation in X[labels == cluster]: entries.add((name, affiliation)) print("Cluster #%d = %s" % (cluster, entries)) print() # Statistics print("Number of blocks =", len(block_clusterer.clusterers_)) print("True number of clusters", len(np.unique(truth))) print("Number of computed clusters", len(np.unique(labels))) print("Paired F-score =", paired_f_score(truth, labels))
X = data["X"] truth = data["y"] # Block clustering with fixed threshold block_clusterer = BlockClustering( blocking=block_last_name_first_initial, base_estimator=ScipyHierarchicalClustering( threshold=0.5, affinity=affinity, method="complete"), verbose=3, n_jobs=-1) block_clusterer.fit(X) labels = block_clusterer.labels_ # Print clusters for cluster in np.unique(labels): entries = set() for name, affiliation in X[labels == cluster]: entries.add((name, affiliation)) print("Cluster #%d = %s" % (cluster, entries)) print() # Statistics print("Number of blocks =", len(block_clusterer.clusterers_)) print("True number of clusters", len(np.unique(truth))) print("Number of computed clusters", len(np.unique(labels))) print("Paired F-score =", paired_f_score(truth, labels))