def test_minmax_condensed(self): condensed_matrix = CondensedDistanceMatrix([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.0]) expected = (1,8.5) self.assertEqual(condensed_matrix.get_minimum_and_maximum(),expected)
def test_normalize_condensed_matrix(self): condensed = CondensedDistanceMatrix( [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.]) expected = CondensedDistanceMatrix( [0.0, 0.47, 1.0, 0.83, 0.47, 0.91, 0.76, 0.35, 0.16, 0.13]) minmax = condensed.get_minimum_and_maximum() condensed.normalize(minmax[0], minmax[1]) for i in range(len(condensed.get_data())): self.assertAlmostEqual(condensed.get_data()[i], expected.get_data()[i], 2)
def test_save_condensed_matrix(self): # with final spaces! expected_matrix_string = """1.0 4.5 7.2 6.7 8.5 4.5 3.6 7.8 2.2 2.0 """ condensed_matrix = CondensedDistanceMatrix( [1.0, 4.5, 7.2, 6.7, 8.5, 4.5, 3.6, 7.8, 2.2, 2.0]) output = cStringIO.StringIO() condensed_matrix.save(output) self.assertEqual(expected_matrix_string, output.getvalue())
def test_load_condensed_matrix(self): matrix_string = """1.0 4.5 8.5 7.2 4.5 7.8 6.7 3.6 2.2 2.0 """ expected_matrix = CondensedDistanceMatrix([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.]) input = cStringIO.StringIO(matrix_string) loaded_matrix = load_condensed_matrix(input) for i in range(len(expected_matrix.get_data())): self.assertAlmostEqual(expected_matrix.get_data()[i],\ loaded_matrix.get_data()[i],3)
def test_load_condensed_matrix(self): matrix_string = """1.0 4.5 8.5 7.2 4.5 7.8 6.7 3.6 2.2 2.0 """ expected_matrix = CondensedDistanceMatrix( [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.]) input = cStringIO.StringIO(matrix_string) loaded_matrix = load_condensed_matrix(input) for i in range(len(expected_matrix.get_data())): self.assertAlmostEqual(expected_matrix.get_data()[i],\ loaded_matrix.get_data()[i],3)
def test_compare_condensed_matrixes(self): cm1 = CondensedDistanceMatrix([1,2,3,4,5,6,7,8,9,0,1,2,3,4,5,6,7,8,9]) cm2 = CondensedDistanceMatrix([6,7,8,9,0,1,2,3,4,5,6,7,8,9,0,1,2,3,4]) cm3 = CondensedDistanceMatrix([1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]) cm4 = CondensedDistanceMatrix([0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5,0.5]) result_1 = cm1.compare_with(cm2) result_2 = cm1.compare_with(cm3) result_3 = cm3.compare_with(cm4,1.,2.) result_4 = cm3.compare_with(cm4,1.,1.) self.assertEqual(result_1, (5.0, 0.0)) self.assertEqual(result_2, (3.8421052631578947, 2.6008734948643863)) self.assertEqual(result_3, (0., 0.)) self.assertEqual(result_4, (0.5, 0.))
def test_normalize_condensed_matrix(self): condensed = CondensedDistanceMatrix([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.]) expected = CondensedDistanceMatrix([0.0, 0.47, 1.0, 0.83, 0.47, 0.91, 0.76, 0.35, 0.16, 0.13]) minmax = condensed.get_minimum_and_maximum() condensed.normalize(minmax[0], minmax[1]) for i in range(len(condensed.get_data())): self.assertAlmostEqual(condensed.get_data()[i],expected.get_data()[i],2)
def test_save_condensed_matrix(self): # with final spaces! expected_matrix_string = """1.0 4.5 7.2 6.7 8.5 4.5 3.6 7.8 2.2 2.0 """ condensed_matrix = CondensedDistanceMatrix([1.0, 4.5, 7.2, 6.7, 8.5, 4.5, 3.6, 7.8, 2.2, 2.0]) output = cStringIO.StringIO() condensed_matrix.save(output) self.assertEqual(expected_matrix_string,output.getvalue())
def test_equal(self): cm1 = CondensedDistanceMatrix( [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm2 = CondensedDistanceMatrix( [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm3 = CondensedDistanceMatrix( [6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4]) cm4 = CondensedDistanceMatrix([6, 7, 8, 9, 0, 1, 2, 3]) self.assertEqual(cm1 == cm2, True) self.assertEqual(cm1 == cm3, False) self.assertEqual(cm1 == cm4, False) self.assertEqual(cm2 == cm3, False) self.assertEqual(cm2 == cm4, False) self.assertEqual(cm3 == cm4, False)
def test_gen_condensed_matrix(self): obs = [(1, 1), (2, 1), (4, 5), (7, 7), (5, 7)] ## distance matrix distance_matrix = CompleteDistanceMatrix(distance.cdist(obs, obs)) ## lower distance matrix (wo diagonal) expected_distance_condensed = CondensedDistanceMatrix( distance.pdist(obs)) distance_condensed = complete_to_condensed(distance_matrix) self.assertEqual(True, distance_condensed == expected_distance_condensed)
def test_everything_for_rand_distrib(self): distances = CondensedDistanceMatrix([0]*int((100*(100-1))/2)) distribution = [60,33,7] rand_alg = FakeDistributionRandomClusteringAlgorithm(distances) clusterization = rand_alg.perform_clustering(kwargs = {"distribution":distribution}) class_map = {0:0.,1:0.,2:0.} for i in range(len(clusterization.clusters)): class_map[i] += clusterization.clusters[i].get_size() for i in range(len(clusterization.clusters)): self.assertTrue(class_map[i] >distribution[i]-1 and class_map[i]<distribution[i]+1)
def test_data_sharing(self): mylist = [ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.] myarray = np.array([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.]) mylistaarray = np.array(mylist) condensed1 = CondensedDistanceMatrix(mylist) condensed2 = CondensedDistanceMatrix(myarray) condensed3 = CondensedDistanceMatrix(mylistaarray) mylist[5] = 0. self.assertEqual(False, mylist[5] == condensed1.get_data()[5]) myarray[5] = 0. self.assertEqual(False, myarray[5] == condensed2.get_data()[5]) mylistaarray[5] = 0. self.assertEqual(False, mylistaarray[5] == condensed3.get_data()[5]) mycontents = condensed3.get_data() mycontents[5] = 0. self.assertEqual(True, mycontents[5] == condensed3.get_data()[5] and\ condensed3.get_data()[5] == 0.)
def test_item_get(self): condensed_matrix_1 = CondensedDistanceMatrix([1.0, 4.5,7.2, 8.5, 4.5, 7.8]) condensed_matrix_2 = CythonCondensedMatrix([.0]*6) complete_matrix = [[0.0, 1.0, 4.5, 7.2], [1.0, 0.0, 8.5, 4.5], [4.5, 8.5, 0.0, 7.8], [7.2, 4.5, 7.8, 0.0]] row_len = condensed_matrix_1.row_length for i in range(row_len): for j in range(row_len): condensed_matrix_2[i,j] = complete_matrix[i][j] ## The access for a complete and a condensed matrix is exactly the same for i in range(row_len): for j in range(row_len): self.assertEquals(condensed_matrix_1[i,j],complete_matrix[i][j]) ## And we can build a condensed matrix as a complete matrix self.assertItemsEqual(condensed_matrix_1.get_data(), condensed_matrix_2.get_data())
def test_everything(self): distances = CondensedDistanceMatrix([ 12.36931688, 5.83095189, 9.43398113, 12.52996409, 15.65247584, 17.4642492, 9.21954446, 4.47213595, 3.16227766, 4.47213595, 5.65685425, 5., 8.06225775, 11.18033989, 13.15294644, 3.16227766, 6.32455532, 8.24621125, 3.16227766, 5.09901951, 2. ]) rand_alg = RandomClusteringAlgorithm(distances) for i in range(100): #@UnusedVariable clusterization = rand_alg.perform_clustering(kwargs = {"max_num_of_clusters":5}) self.assertLess(len(clusterization.clusters),6) absolutely_all_elements = [] for c in clusterization.clusters: absolutely_all_elements.extend(c.all_elements) absolutely_all_elements.sort() self.assertItemsEqual(absolutely_all_elements, range(distances.row_length))
def test_validate_dimensions(self): condensed_matrix_1 = CondensedDistanceMatrix( [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.]) self.assertEqual( True, condensed_matrix_1._CondensedDistanceMatrix__validate_dimensions()) condensed_matrix_2 = CondensedDistanceMatrix( [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6]) self.assertEqual( False, condensed_matrix_2._CondensedDistanceMatrix__validate_dimensions())
def test_compare_condensed_matrixes(self): cm1 = CondensedDistanceMatrix( [1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) cm2 = CondensedDistanceMatrix( [6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4]) cm3 = CondensedDistanceMatrix( [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) cm4 = CondensedDistanceMatrix([ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ]) result_1 = cm1.compare_with(cm2) result_2 = cm1.compare_with(cm3) result_3 = cm3.compare_with(cm4, 1., 2.) result_4 = cm3.compare_with(cm4, 1., 1.) self.assertEqual(result_1, (5.0, 0.0)) self.assertEqual(result_2, (3.8421052631578947, 2.6008734948643863)) self.assertEqual(result_3, (0., 0.)) self.assertEqual(result_4, (0.5, 0.))
def test_data_sharing(self): mylist = [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.] myarray = np.array([1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.]) mylistaarray = np.array(mylist) condensed1 = CondensedDistanceMatrix(mylist) condensed2 = CondensedDistanceMatrix(myarray) condensed3 = CondensedDistanceMatrix(mylistaarray) mylist[5] = 0. self.assertEqual(False, mylist[5] == condensed1.get_data()[5]) myarray[5] = 0. self.assertEqual(False, myarray[5] == condensed2.get_data()[5]) mylistaarray[5] = 0. self.assertEqual(False, mylistaarray[5] == condensed3.get_data()[5]) mycontents = condensed3.get_data() mycontents[5] = 0. self.assertEqual(True, mycontents[5] == condensed3.get_data()[5] and\ condensed3.get_data()[5] == 0.)
def test_validate_dimensions(self): condensed_matrix_1 = CondensedDistanceMatrix([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6,2.2, 2.]) self.assertEqual(True,condensed_matrix_1._CondensedDistanceMatrix__validate_dimensions()) condensed_matrix_2 = CondensedDistanceMatrix([ 1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6]) self.assertEqual(False,condensed_matrix_2._CondensedDistanceMatrix__validate_dimensions())
def test_minmax_condensed(self): condensed_matrix = CondensedDistanceMatrix( [1., 4.5, 8.5, 7.2, 4.5, 7.8, 6.7, 3.6, 2.2, 2.0]) expected = (1, 8.5) self.assertEqual(condensed_matrix.get_minimum_and_maximum(), expected)