class ClusterFinderTest(unittest.TestCase): def setUp(self): self.config = build_config([ "--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5" ], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'PF77776'), (500, 505, None, 'PF77775'), (1010, 1020, 0.1, 'PF77774'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end, strand=1) self.record.add_cds_feature( CDSFeature(location, locus_tag=str(start), translation="A")) pfam = DummyPFAMDomain(location=location, protein_start=start + 1, protein_end=end - 1, identifier=pfam_id) pfam.domain_id = "pfam_%d" % start pfam.probability = probability self.record.add_pfam_domain(pfam) def tearDown(self): destroy_config() def test_options(self): for val in [-1., -.01, -0.0001, 1.0001, 2.]: update_config({"cf_threshold": val}) assert len(clusterfinder.check_options(self.config)) == 1 def test_check_prereqs(self): self.assertEqual([], clusterfinder.check_prereqs(self.config)) def test_find_nr_cds(self): left = (0, 5) newpos, num = clusterfinder.probabilistic.find_nr_cds( left, self.record) self.assertEqual(left, newpos) self.assertEqual(0, num) right = (150, 160) newpos, count = clusterfinder.probabilistic.find_nr_cds( right, self.record) assert newpos == right assert count == 0 middle = (35, 115) newpos, count = clusterfinder.probabilistic.find_nr_cds( middle, self.record) assert newpos == (30, 120) assert count == 7 small = (501, 504) newpos, count = clusterfinder.probabilistic.find_nr_cds( small, self.record) assert newpos == (500, 505) assert count == 1 def test_find_probabilistic_clusters(self): ret = clusterfinder.find_probabilistic_clusters( self.record, self.config) assert len(ret) == 2 assert ret[0].location.start == 30 assert ret[0].location.end == 120 assert ret[1].location.start == 1030 assert ret[1].location.end == 1120 def test_no_overlaps(self): results = clusterfinder.generate_results(self.record, self.config) areas = self.record.get_subregions() assert len(results.areas) == 2 assert len(areas) == 2 assert list(areas) == results.areas area = areas[0] assert area.location.start == 30 assert area.location.end == 120 self.assertAlmostEqual(0.6429, area.probability, places=4) area = areas[1] assert area.location.start == 1030 assert area.location.end == 1120 self.assertAlmostEqual(0.6429, area.probability, places=4) def test_full_loop(self): results = clusterfinder.run_on_record(self.record, None, self.config) old_areas = len(results.areas) json_string = json.dumps(results.to_json()) regenned = clusterfinder.regenerate_previous_results( json.loads(json_string), self.record, self.config) assert len(regenned.areas) == old_areas for old, new in zip(results.areas, regenned.areas): assert old.location == new.location self.assertAlmostEqual(old.probability, new.probability, places=6)
class ClusterFinderTest(unittest.TestCase): def setUp(self): self.config = build_config([ "--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5" ], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'FAKE007'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'FAKE234'), (500, 505, None, 'FAKE505'), (1010, 1020, 0.1, 'FAKE007'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end) self.record.add_cds_feature( CDSFeature(location, locus_tag=str(start))) pfam = PFAMDomain(location, "dummy_description") pfam.db_xref.append(pfam_id) pfam.probability = probability self.record.add_pfam_domain(pfam) def tearDown(self): destroy_config() def test_options(self): for val in [-1., -.01, -0.0001, 1.0001, 2.]: update_config({"cf_threshold": val}) assert len(clusterfinder.check_options(self.config)) == 1 def test_check_prereqs(self): self.assertEqual([], clusterfinder.check_prereqs()) def test_find_nr_cds(self): left = (0, 5) newpos, num = clusterfinder.probabilistic.find_nr_cds( left, self.record) self.assertEqual(left, newpos) self.assertEqual(0, num) right = (150, 160) newpos, count = clusterfinder.probabilistic.find_nr_cds( right, self.record) assert newpos == right assert count == 0 middle = (35, 115) newpos, count = clusterfinder.probabilistic.find_nr_cds( middle, self.record) assert newpos == [30, 120] assert count == 7 small = (501, 504) newpos, count = clusterfinder.probabilistic.find_nr_cds( small, self.record) assert newpos == [500, 505] assert count == 1 def test_find_probabilistic_clusters(self): ret = clusterfinder.find_probabilistic_clusters( self.record, self.config) assert len(ret) == 2 assert ret[0].location.start == 30 assert ret[0].location.end == 120 assert ret[1].location.start == 1030 assert ret[1].location.end == 1120 def test_no_overlaps(self): results = clusterfinder.generate_results(self.record, self.config) self.assertEqual(2, len(results.borders)) assert list(self.record.get_cluster_borders()) == results.borders cluster1, cluster2 = self.record.get_cluster_borders() assert cluster1.location.start == 30 assert cluster1.location.end == 120 assert not cluster1.high_priority_product self.assertAlmostEqual(0.6429, cluster1.probability, places=4) assert cluster2.location.start == 1030 assert cluster2.location.end == 1120 assert not cluster2.high_priority_product self.assertAlmostEqual(0.6429, cluster2.probability, places=4) def test_merges(self): clusterfinder.generate_results(self.record, self.config) assert len(self.record.get_cluster_borders()) == 2 for start, end in [(10, 40), (1040, 1050), (110, 400)]: loc = FeatureLocation(start, end) self.record.add_cluster_border( ClusterBorder(loc, "testtool", product=str(start))) assert not self.record.get_clusters() self.record.create_clusters_from_borders() assert len(self.record.get_clusters()) == 2 cluster1, cluster2 = self.record.get_clusters() assert cluster1.location.start == 10 assert cluster1.location.end == 400 assert cluster1.products == ("10", "110") assert cluster2.location.start == 1030 assert cluster2.location.end == 1120 assert cluster2.products == ("1040", )