def test_group_by_distance_all(self): obs = group_by_distance(self.testclusterslt10, 10) exp = { '(((...)))...(((...)))': [(9, 10)], '(((...)))': [(1, 2), (3, 4), (11, 12), (5, 6)], '.....((((.......))))...': [(15, 16), (7, 8)], '............': [(13, 14)]} self.assertEqual(obs, exp)
def test_group_by_distance_lt10_specific_nogroup(self): obs = group_by_distance(self.testclusterslt10, 10, ['.....((((.......))))...']) exp = { "(((...)))": [(1, 2), (3, 4)], "((.....))": [(5, 6)], ".....((((.......))))...": [(7, 8)], "(((...)))...(((...)))": [(9, 10)], "((((....))))": [(11, 12)], '............': [(13, 14)] } self.assertEqual(obs, exp)
def test_group_by_distance_lt10_specific_group(self): obs = group_by_distance(self.testclusterslt10, 15, specstructs=['(((...)))']) print obs
files = [] hold = {} pool = Pool(processes=args.c) #run the pool over all shape groups to get final grouped structgroups fout = open(otufolder + "shapesizes.txt", 'w') groupnum = 1 for shapegroup in groups_shape.keys(): #write out each group to file for use in subprocess groupinfo = {struct: structgroups[struct] for struct in groups_shape[shapegroup]} fout.write(shapegroup + "\t" + str(len(groupinfo)) + "\n") pool.apply_async(func=group_by_distance, args=(groupinfo, structscore), callback=hold.update) structgroups.clear() del structgroups stime = time() hold.update(group_by_distance(groupinfo, structscore, None, None, args.nr)) print len(groupinfo), "clusters:", str((time()-stime)/60), "min" fout.close() #memory saving wipe of structgroups, groups_shape, and groupinfo groups_shape.clear() del groups_shape groupinfo.clear() del groupinfo pool.close() pool.join() print "Grouped ("+str((time()-stime)/60)+" min)" structgroups = dict(hold) del hold #sort all structure sequences by count, highest to lowest for struct in structgroups: