def test_reuse_hierarchy_fail_different_data(self): d = self.d.copy() e = self.d.copy() c = HierarchyCluster() c.set_metric("euclidean") c.set_max_d(1.5) r = c.run(d) r.write() with self.assertRaises(ValueError) as ex: c.run(e, reuse_hierarchy_from=r) self.assertTrue("different data object" in str(ex.exception))
def test_reuse_hierarchy(_data): d = _data.copy() c = HierarchyCluster() c.set_metric("euclidean") c.set_max_d(1.5) r = c.run(d) r.write() r2 = c.run(d, reuse_hierarchy_from=r) r2.write(cluster_column="reused") assert d.df["cluster"].tolist() == d.df["reused"].tolist()
def test_reuse_hierarchy(self): d = self.d.copy() c = HierarchyCluster() c.set_metric("euclidean") c.set_max_d(1.5) r = c.run(d) r.write() r2 = c.run(d, reuse_hierarchy_from=r) r2.write(cluster_column="reused") self.assertListEqual(d.df["cluster"].tolist(), d.df["reused"].tolist())
def test_dendrogram_plot(_data, tmp_path): c = HierarchyCluster() c.set_metric() c.set_max_d(0.2) r = c.run(_data) r.dendrogram(output=str(tmp_path / "output.pdf"))
def test_hierarchy_cluster_no_max_d(_data): d = _data.copy() c = HierarchyCluster() with pytest.raises(ValueError, match=".*set_max_d.*"): c.run(d)
def test_reuse_hierarchy_fail_different_cluster(_data): d = _data.copy() c = HierarchyCluster() c2 = HierarchyCluster() c.set_metric("euclidean") c.set_max_d(1.5) c2.set_metric("euclidean") c2.set_max_d(1.5) r = c.run(d) r.write() with pytest.raises(ValueError, match=".*different HierarchyCluster object.*"): c2.run(d, reuse_hierarchy_from=r)
def test_cluster(_data): d = _data.copy() c = HierarchyCluster() c.set_metric("euclidean") c.set_max_d(0.75) c.run(d).write() c.set_max_d(1.5) c.run(d).write(cluster_column="cluster15") # The minimal distance between our distributions is 1, so they all # end up in different clusters assert len(d.clusters()) == d.n # This is a bit unfortunate, since we have so many distribution pairs # with equal distance (so it's up to the implementation of the algorithm # , which clusters develop) but this is what happened so far: assert len(d.clusters(cluster_column="cluster15")) == 6
def test_dendrogram_plot(self): c = HierarchyCluster() c.set_metric() c.set_max_d(0.2) r = c.run(self.d) r.dendrogram()
def test_hierarchy_cluster_no_max_d(self): d = self.d.copy() c = HierarchyCluster() with self.assertRaises(ValueError) as e: c.run(d) self.assertTrue("set_max_d" in str(e.exception))