def split(self, k, assignment): """ Function to split cluster based on least representative alignment """ print assignment members = self.get_clusters(assignment)[k] tree = self.get_cluster_trees(assignment)[k] alignment_scores = {} print 'Calculating alignment scores...' for i in members: r = self.Collection.records[i] alignment_scores[i] = self.test(r, tree) seed, min_score = min(alignment_scores.iteritems(), key=operator.itemgetter(1)) print 'Splitting on {0}.'.format(seed) new_assignment = list(assignment.partition_vector) new_assignment[seed] = max(assignment.partition_vector) + 1 print 'New Partition: {0}'.format(new_assignment) print 'Assigning to new partition...' new_assignment = Partition(new_assignment) scores = self.score_sample(members, new_assignment) assignment = self.make_new_assignment(members, scores, new_assignment, nreassign=len(members)) print 'Returning: {0}'.format(assignment) return assignment
def get_partition(clusters): seq = clusters if isinstance(clusters, dict) else range(len(clusters)) length = sum([len(clusters[i]) for i in seq]) pvec = [0] * length for k in seq: for i in clusters[k]: pvec[i] = k return (Partition(tuple(pvec)))
def make_new_assignment(self, sample, scores, assignment, nreassign=1, choose='max'): """ MAKES A NEW PARTITION BY REASSIGNING RECORDS BETWEEN CLUSTERS """ new_clusters = scores.argmax(axis=1) M = scores / scores.sum(axis=1)[:, np.newaxis] if choose == 'max': reassignments = M.max(axis=1).argsort()[-nreassign:] elif choose == 'min': reassignments = M.min(axis=1).argsort()[:nreassign] new_assignment = list(assignment.partition_vector) for i in reassignments: new_assignment[sample[ i]] = new_clusters[i] + 1 # because cluster number is in range # [1,x], and new_clusters is in range [0,x-1] return Partition(tuple(new_assignment))
def get_true_partition(self): l = [] for k in range(len(self.class_list)): l.extend([k + 1] * self.class_list[k]) self.true_partition = Partition(l)
return dict(zip(nC, avg_spread_norm + np.array([len(p) for p in partitions]))) def pen_val_debug(self, *partitions): partitions = [p for p in partitions if 1 < len(p) < self.n_obs] nc = np.array([len(p) for p in partitions]) print('clusters per partition: {}'.format([str(x) for x in nc])) avg_spread = np.array([self._average_spread(partition) for partition in partitions]) print('average_spread_list: {}'.format(list(avg_spread))) avg_spread_norm = self._normalised_average_spread(*partitions) print('normalised: {}'.format(avg_spread_norm)) return dict(zip(nc, avg_spread_norm + nc)) if __name__ == '__main__': plist = [Partition((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)), Partition((1, 2, 3, 4, 5, 6, 5, 7, 8, 9, 10, 11)), Partition((1, 2, 3, 4, 5, 6, 5, 7, 8, 9, 10, 9)), Partition((1, 2, 1, 3, 4, 5, 4, 6, 7, 8, 9, 8)), Partition((1, 2, 1, 3, 4, 5, 4, 4, 6, 7, 8, 7)), Partition((1, 2, 1, 3, 4, 5, 4, 4, 6, 7, 7, 7)), Partition((1, 2, 1, 3, 4, 5, 4, 4, 6, 6, 6, 6)), Partition((1, 1, 1, 2, 3, 4, 3, 3, 5, 5, 5, 5)), Partition((1, 1, 1, 2, 3, 3, 3, 3, 4, 4, 4, 4)), Partition((1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3)), Partition((1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2)), Partition((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1))] dm = np.array( [[0., 0.352, 0.23, 0.713, 0.426, 0.653, 0.481, 0.554, 1.533, 1.549, 1.505, 1.46], [0.352, 0., 0.249, 0.772, 0.625, 0.909, 0.668, 0.725, 1.613, 1.623, 1.568, 1.523], [0.23, 0.249, 0., 0.811, 0.417, 0.751, 0.456, 0.52, 1.489, 1.501, 1.446, 1.396],
def test_equal(self): p1 = Partition([0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1]) p2 = Partition([2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) self.assertEqual(p1, p2)
def test_minimal(self): p1 = Partition([1, 1, 1, 1, 1, 1, 1, 1, 1]) p2 = Partition([0, 1, 2, 3, 4, 5, 6, 7, 8]) self.assertTrue(p1.is_minimal()) self.assertFalse(p2.is_minimal()) self.assertFalse(self.partition.is_minimal())
def setUp(self): self.partition = Partition([ 'a', 'd', 'a', 'a', 'b', 'a', 'b', 'c', 'c', 'd', 'd', 'd', 'd', 'd' ])
def random_partition(self, nclusters): return Partition( tuple(np.random.randint(nclusters, size=len(self.Collection))))
def merge(self, assignment, label1, label2): pvec = ((x if x != label1 else label2) for x in assignment.partition_vector) return Partition(tuple(pvec))