Пример #1
0
 def setUp(self):
     self.genes = []
     self.clusters = []
     domain_names = self.gen_domain_names()
     for product in ['not_atpks', 'transatpks']:
         cluster = helpers.DummyCluster(1, 2, products=[product])
         assert cluster.products == (product, )
         for i in range(7):
             locus_tag = chr(ord('a') + i)
             if i == 6:
                 locus_tag = "all"
             cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag)
             cds.product = product
             cds.nrps_pks = DummyNRPSQualfier()
             cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" +
                                                      locus_tag]
             cds.cluster = cluster
             cluster.add_cds(cds)
             self.genes.append(cds)
         self.clusters.append(cluster)
     self.predictions = [
         'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal',
         'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal',
         'mal', 'ccemal'
     ]
Пример #2
0
 def test_orphaned_supercluster_number(self):
     record = Record("A" * 1000)
     cluster = helpers.DummyCluster(0, 1000)
     supercluster = SuperCluster(SuperCluster.kinds.SINGLE, [cluster])
     with self.assertRaisesRegex(ValueError,
                                 "SuperCluster not contained in record"):
         print(record.get_supercluster_number(supercluster))
Пример #3
0
 def test_cluster_numbering(self):
     record = Record(Seq("A"*1000))
     for start, end in [(50, 100), (10, 40), (700, 1000), (0, 9)]:
         cluster = helpers.DummyCluster(start, end)
         record.add_cluster(cluster)
     for i, cluster in enumerate(sorted(list(record.get_clusters()))):
         assert cluster.get_cluster_number() == i + 1
Пример #4
0
    def test_classification_with_colon(self):
        # since SMCOG id and description are stored in a string separated by :,
        # ensure that descriptions containing : are properly handled
        # test gene is AQF52_5530 from CP013129.1
        translation = (
            "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN"
            "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL"
            "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV"
            "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT"
            "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS"
            "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM"
            "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI"
            "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA"
            "PVPVPDQRAEGGEEKARAGV")
        cds = helpers.DummyCDS(0, len(translation))
        cds.translation = translation
        results = smcogs.classify("test", [cds], get_config())
        assert results.best_hits[cds.get_name(
        )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter"
        record = helpers.DummyRecord(seq=translation)
        record.add_cds_feature(cds)
        record.add_cluster(helpers.DummyCluster(0, len(translation)))

        # if we don't handle multiple semicolons right, this line will crash
        results.add_to_record(record)
        gene_functions = cds.gene_functions.get_by_tool("smcogs")
        assert len(gene_functions) == 1
        assert str(gene_functions[0]).startswith(
            "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter"
            " (Score: 416; E-value: 2.3e-126)")
Пример #5
0
    def test_orphaned_cluster_number(self):
        record = Record(Seq("A" * 1000))
        cluster = helpers.DummyCluster(0, 1000)

        with self.assertRaisesRegex(ValueError, "Cluster not contained in record"):
            print(record.get_cluster_number(cluster))

        with self.assertRaisesRegex(ValueError, "Cluster not contained in record"):
            print(cluster.get_cluster_number())
Пример #6
0
 def test_cluster_numbering(self):
     features = []
     for start, end in self.pairs:
         cluster = helpers.DummyCluster(start, end)
         self.record.add_cluster(cluster)
         features.append(cluster)
     features = sorted(features)
     for i, cluster in enumerate(self.record.get_clusters()):
         assert cluster.get_cluster_number() == i + 1
         assert self.record.get_cluster(i + 1) is features[i]
Пример #7
0
 def test_cds_cluster_linkage(self):
     record = Record("A"*200)
     for start, end in [(50, 100), (10, 90), (0, 9), (150, 200)]:
         record.add_cds_feature(helpers.DummyCDS(start, end))
     for start, end in [(10, 120), (5, 110), (10, 160), (45, 200)]:
         record.clear_clusters()
         cluster = helpers.DummyCluster(start, end)
         record.add_cluster(cluster)
         assert len(cluster.cds_children) == 2
         for cds in cluster.cds_children:
             assert cds.overlaps_with(cluster)
Пример #8
0
 def test_supercluster_numbering(self):
     features = []
     for location in self.locations:
         supercluster = SuperCluster(
             SuperCluster.kinds.SINGLE,
             [helpers.DummyCluster(location.start, location.end)])
         self.record.add_supercluster(supercluster)
         features.append(supercluster)
     features = sorted(features)
     for i, cluster in enumerate(self.record.get_superclusters()):
         assert cluster.get_supercluster_number() == i + 1
         assert self.record.get_supercluster(i + 1) is features[i]
Пример #9
0
 def test_classification_with_colon(self):
     # since SMCOG id and description are stored in a string separated by :,
     # ensure that descriptions containing : are properly handled
     cds = helpers.DummyCDS(locus_tag="test")
     record = helpers.DummyRecord(features=[cds], seq="A" * 100)
     record.add_cluster(helpers.DummyCluster(0, 100))
     results = SMCOGResults(record.id)
     results.best_hits[cds.get_name()] = HMMResult(
         "SMCOG1212:sodium:dicarboxylate_symporter", 0, 100, 2.3e-126, 416)
     results.add_to_record(record)
     gene_functions = cds.gene_functions.get_by_tool("smcogs")
     assert len(gene_functions) == 1
     assert str(gene_functions[0]).startswith(
         "transport (smcogs) SMCOG1212:sodium:dicarboxylate_symporter"
         " (Score: 416; E-value: 2.3e-126)")
Пример #10
0
    def test_cds_removal(self):
        record = Record(Seq("A" * 1000))
        cluster = helpers.DummyCluster(0, 1000)
        record.add_cluster(cluster)

        first_cds = helpers.DummyCDS(0, 100, locus_tag="A")
        second_cds = helpers.DummyCDS(200, 300, locus_tag="B")
        record.add_cds_feature(first_cds)
        record.add_cds_feature(second_cds)

        assert len(record.get_cds_features()) == 2
        assert len(cluster.cds_children) == 2

        record.remove_cds_feature(first_cds)

        assert len(record.get_cds_features()) == 1
        assert len(cluster.cds_children) == 1
        assert record.get_cds_features()[0] is list(cluster.cds_children)[0]
        assert record.get_cds_features()[0].locus_tag == "B"