def test_cluster(self): mdr = MDR() page = get_page('htmlpage0') candidates, doc = mdr.list_candidates(page, 'utf8') m = mdr.calculate_similarity_matrix(candidates[0]) self.assertEquals(1, len(set(mdr.hcluster(m)))) page1 = get_page('htmlpage1') candidates, doc = mdr.list_candidates(page1, 'utf8') m = mdr.calculate_similarity_matrix(candidates[0]) # first element is different from the rests self.assertEquals(3, len(set(mdr.hcluster(m))))