def test_annotate_projection(self): clusters, clusters_meta, eps = annotate_projection( self.annotations, self.data, clustering_algorithm=DBSCAN, labels_per_cluster=2, eps=3) self.assertLessEqual(int(Orange.__version__.split(".")[1]), 22)
def test_example_not_clustered(self): self.data[-1] = [23, 23] clusters, clusters_meta, eps = annotate_projection( self.annotations, self.data, clustering_algorithm=DBSCAN, eps=2, min_samples=3) self.assertLessEqual(int(Orange.__version__.split(".")[1]), 22)
def test_other_clustering(self): clusters, clusters_meta, locs = annotate_projection( self.annotations, self.data, clustering_algorithm=KMeans, n_clusters=2, labels_per_cluster=2) self.assertEqual(2, len(set(clusters.X.flatten()))) self.assertEqual(3, len(clusters_meta["C1"])) self.assertEqual(3, len(clusters_meta["C2"])) self.assertEqual(2, len(clusters_meta["C1"][0])) self.assertEqual(2, len(clusters_meta["C2"][0]))
def compute_clusters(embedding: Table, result: Result, state: TaskState): if not result.scores.table or not embedding: result.clusters.table = None result.clusters.groups = None else: state.set_status("Finding clusters...") kwargs = {} if result.clusters.epsilon is not None: kwargs["eps"] = result.clusters.epsilon clusters = annotate_projection(result.scores.table, embedding, **kwargs) result.clusters.table = clusters[0] result.clusters.groups = clusters[1] result.clusters.epsilon = clusters[2] state.set_partial_result(("clusters", result))
def test_one_label(self): """ Test whether having only one label works fine, in this case one cluster will not have a label assigned - this must work fine as well. """ domain_ann = Domain([ContinuousVariable("a")]) data_ann = np.array([[0.9], [0.9], [0.9], [0.9], [0.9], [0], [0], [0], [0], [0], [0], [0]]) ann = Table(domain_ann, data_ann) clusters, clusters_meta, eps = annotate_projection(ann, self.data, eps=2) self.assertGreater(len(clusters), 0) self.assertGreater(len(clusters_meta), 0) self.assertGreater(len(clusters_meta["C1"][0]), 0) self.assertGreater(len(clusters_meta["C1"][1]), 0) self.assertGreater(len(clusters_meta["C1"][2]), 0)
def test_example_not_clustered(self): self.data[-1] = [23, 23] clusters, clusters_meta, eps = annotate_projection( self.annotations, self.data, clustering_algorithm=DBSCAN, eps=2, min_samples=3) self.assertListEqual( list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])), ["C2"] * 5 + ["C1"] * 6 + ["?"]) self.assertEqual(len(clusters_meta), 2) self.assertEqual(len(clusters_meta["C1"]), 3) self.assertEqual(len(clusters_meta["C2"]), 3) self.assertEqual(clusters_meta["C1"][0][0][0], 'c') self.assertAlmostEqual(clusters_meta["C1"][0][0][1], 0.5, 5) self.assertEqual(clusters_meta["C1"][0][1][0], 'b') self.assertAlmostEqual(clusters_meta["C1"][0][1][1], 0.5, 5)
def test_annotate_projection(self): clusters, clusters_meta, eps = annotate_projection( self.annotations, self.data, clustering_algorithm=DBSCAN, labels_per_cluster=2, eps=3.0) self.assertListEqual( list(map(clusters.domain.attributes[0].repr_val, clusters.X[:, 0])), ["C2"] * 5 + ["C1"] * 7) self.assertEqual(len(clusters_meta), 2) self.assertEqual(len(clusters_meta["C1"]), 3) self.assertEqual(len(clusters_meta["C2"]), 3) self.assertEqual(clusters_meta["C1"][0][0][0], 'b') self.assertAlmostEqual(clusters_meta["C1"][0][0][1], 4 / 7, 5) self.assertEqual(clusters_meta["C1"][0][1][0], 'c') self.assertAlmostEqual(clusters_meta["C1"][0][1][1], 3 / 7, 5) self.assertEqual(2, len(clusters_meta["C1"][1])) self.assertEqual(2, clusters_meta["C1"][2].shape[1]) self.assertEqual(2, clusters_meta["C1"][2].shape[1]) self.assertEqual(float, type(eps))
def test_empty_annotations(self): ann = Table(Domain([]), np.empty((len(self.data), 0))) clusters, clusters_meta, eps = annotate_projection(ann, self.data) self.assertGreaterEqual(len(clusters), 0) self.assertEqual(len(clusters_meta), 0)