def test_union_find(self): set1 = UnionFind() set2 = UnionFind() set3 = UnionFind() set1.add(1) set1.add(2) self.assertEqual(len(set1), 2) self.assertTrue(1 in set1) self.assertFalse(set1.has(-1)) set2.add(3) set2.add(4) set2.add(5) self.assertEqual(len(set2), 3) set3.add(5) set3.add(6) set3.add(7) self.assertEqual(len(set3), 3) self.assertFalse(set1.same(set2)) set1.union(set2) self.assertTrue(set1.same(set2)) set1.union(set3) self.assertTrue(set1.members(), set([1, 2, 3, 4, 5, 6, 7])) self.assertTrue(len(set1), len(set2))
def test_union_find(self): set1 = UnionFind() set2 = UnionFind() set3 = UnionFind() set1.add(1) set1.add(2) self.assertEqual(len(set1), 2) self.assertTrue(1 in set1) self.assertFalse(set1.has(-1)) set2.add(3) set2.add(4) set2.add(5) self.assertEqual(len(set2), 3) set3.add(5) set3.add(6) set3.add(7) self.assertEqual(len(set3), 3) self.assertFalse(set1.same(set2)) set1.union(set2) self.assertTrue(set1.same(set2)) set1.union(set3) self.assertTrue( set1.members(), set([1, 2, 3, 4, 5, 6, 7])) self.assertTrue(len(set1), len(set2))
def cluster_hits(hits, radius1, radius2=None, samedir=False): """ Cluster hits using windows hits -- iterable of tuples (region1, region2, extra) radius -- radius of window in query genome radius2 -- radius of window in subject genome (default=radius) samdir -- whether or not to require genes in same direction hits must be sorted by query region species, chrom, and start """ # connected components set comps = {} for hit, syntenic in find_syntenic_neighbors(hits, radius1, radius2): # get block of hit block = comps.get(hit, None) if block is None: block = UnionFind([hit]) comps[hit] = block # union block with syntenic hits for hit2 in syntenic: block2 = comps.get(hit2, None) # check whether hits are in the same direction if samedir and not samedir_hits(hit, hit2): if hit2 not in comps: comps[hit2] = UnionFind([hit2]) continue if block2 is None: comps[hit2] = block block.add(hit2) else: block2.union(block) # get the set of blocks comps = set(b.root() for b in comps.itervalues()) return comps