示例#1
0
def create_fake_record():
    """Set up a fake sequence record"""
    seq_record = helpers.DummyRecord(
        seq=Seq("acgtacgtacgtacgtacgtacgtacgtacgtacgtacgtacgtacgta" * 196))
    seq_record.name = "test"
    locations = [
        FeatureLocation(100, 300, strand=1),
        FeatureLocation(101, 299, strand=-1),
        FeatureLocation(250, 350, strand=1),
        FeatureLocation(500, 1000, strand=1),
        FeatureLocation(1111, 1500, strand=-1),
        FeatureLocation(2000, 2200, strand=-1),
        FeatureLocation(2999, 4000, strand=1),
        FeatureLocation(4321, 5678, strand=1),
        FeatureLocation(6660, 9000, strand=-1)
    ]
    for i in range(9):
        cds = helpers.DummyCDS(locus_tag="gene" + str(i + 1))
        cds.location = locations[i]
        seq_record.add_cds_feature(cds)
        seq_record.add_gene(
            secmet.Gene(locations[i], locus_tag="gene" + str(i + 1)))
        if i == 3 or i == 5:
            cds.gene_functions.add(secmet.qualifiers.GeneFunction.CORE,
                                   "testtool", "dummy", "product")

    return seq_record
示例#2
0
 def setUp(self):
     self.genes = []
     self.regions = []
     domain_names = self.gen_domain_names()
     for product in ['not_atpks', 'transatpks']:
         cluster = helpers.DummyProtocluster(1, 2, product=product)
         candidate_cluster = helpers.DummyCandidateCluster([cluster])
         self.regions.append(Region(candidate_clusters=[candidate_cluster]))
         for i in range(7):
             locus_tag = chr(ord('a') + i)
             if i == 6:
                 locus_tag = "all"
             cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag)
             cds.product = product
             cds.nrps_pks = DummyNRPSQualfier()
             cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" +
                                                      locus_tag]
             cds.cluster = cluster
             cluster.add_cds(cds)
             self.genes.append(cds)
             self.regions[-1].add_cds(cds)
             assert cds.region == self.regions[-1]
     self.predictions = [
         'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal',
         'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal',
         'mal', 'ccemal'
     ]
示例#3
0
    def test_classification_with_colon(self):
        # since SMCOG id and description are stored in a string separated by :,
        # ensure that descriptions containing : are properly handled
        # test gene is AQF52_5530 from CP013129.1
        translation = (
            "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN"
            "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL"
            "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV"
            "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT"
            "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS"
            "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM"
            "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI"
            "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA"
            "PVPVPDQRAEGGEEKARAGV")
        cds = helpers.DummyCDS(0, len(translation))
        cds.translation = translation
        results = smcogs.classify("test", [cds], get_config())
        assert results.best_hits[cds.get_name(
        )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter"
        record = helpers.DummyRecord(seq=translation)
        record.add_cds_feature(cds)
        record.add_protocluster(helpers.DummyProtocluster(0, len(translation)))

        # if we don't handle multiple semicolons right, this line will crash
        results.add_to_record(record)
        gene_functions = cds.gene_functions.get_by_tool("smcogs")
        assert len(gene_functions) == 1
        assert str(gene_functions[0]).startswith(
            "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter"
            " (Score: 416; E-value: 2.3e-126)")
示例#4
0
 def setUp(self):
     self.genes = []
     self.clusters = []
     domain_names = self.gen_domain_names()
     for product in ['not_atpks', 'transatpks']:
         cluster = helpers.DummyCluster(1, 2, products=[product])
         assert cluster.products == (product, )
         for i in range(7):
             locus_tag = chr(ord('a') + i)
             if i == 6:
                 locus_tag = "all"
             cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag)
             cds.product = product
             cds.nrps_pks = DummyNRPSQualfier()
             cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" +
                                                      locus_tag]
             cds.cluster = cluster
             cluster.add_cds(cds)
             self.genes.append(cds)
         self.clusters.append(cluster)
     self.predictions = [
         'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal',
         'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal',
         'mal', 'ccemal'
     ]
示例#5
0
    def test_cds_removal(self):
        record = Record(Seq("A" * 1000))
        cluster = helpers.DummyCluster(0, 1000)
        record.add_cluster(cluster)

        first_cds = helpers.DummyCDS(0, 100, locus_tag="A")
        second_cds = helpers.DummyCDS(200, 300, locus_tag="B")
        record.add_cds_feature(first_cds)
        record.add_cds_feature(second_cds)

        assert len(record.get_cds_features()) == 2
        assert len(cluster.cds_children) == 2

        record.remove_cds_feature(first_cds)

        assert len(record.get_cds_features()) == 1
        assert len(cluster.cds_children) == 1
        assert record.get_cds_features()[0] is list(cluster.cds_children)[0]
        assert record.get_cds_features()[0].locus_tag == "B"
示例#6
0
 def test_cds_cluster_linkage(self):
     record = Record("A"*200)
     for start, end in [(50, 100), (10, 90), (0, 9), (150, 200)]:
         record.add_cds_feature(helpers.DummyCDS(start, end))
     for start, end in [(10, 120), (5, 110), (10, 160), (45, 200)]:
         record.clear_clusters()
         cluster = helpers.DummyCluster(start, end)
         record.add_cluster(cluster)
         assert len(cluster.cds_children) == 2
         for cds in cluster.cds_children:
             assert cds.overlaps_with(cluster)
示例#7
0
    def test_add_when_regenerating(self):
        record = helpers.DummyRecord(seq="A"*3800)
        record.id = 'Y16952.3.trimmed'
        record.add_cds_feature(helpers.DummyCDS(start=0, end=1800, locus_tag="two_domains"))
        record.add_cds_feature(helpers.DummyCDS(start=1900, end=4000, locus_tag="one_domain"))
        record.add_cds_feature(helpers.DummyCDS(start=4100, end=4400, locus_tag="no_hits"))

        two_domain_json = {'domain_hmms': [{'bitscore': 360.7, 'query_end': 428, 'evalue': 2.1e-110, 'hit_id': 'AMP-binding', 'query_start': 35},
                                           {'bitscore': 66.0, 'query_end': 569, 'evalue': 6.3e-21, 'hit_id': 'PCP', 'query_start': 504}],
                           'motif_hmms': [],
                           'type': 'NRPS'}
        one_domain_json = {'domain_hmms': [{'bitscore': 76.9, 'query_end': 382, 'evalue': 3.9e-24, 'hit_id': 'ECH', 'query_start': 170}],
                           'motif_hmms': [{'query_start': 18, 'evalue': 4.7e-05, 'query_end': 30, 'bitscore': 16.1, 'hit_id': 'C1_dual_004-017'},
                                          {'query_start': 38, 'evalue': 1.4e-19, 'query_end': 78, 'bitscore': 62.4, 'hit_id': 'C2_DCL_024-062'}],
                           'type': 'other'}

        json = {'cds_results': {'two_domains': two_domain_json,
                                'one_domain': one_domain_json},
                'record_id': record.id,
                'schema_version': 1}

        assert not record.get_antismash_domains()
        assert not record.get_cds_motifs()

        results = nrps_pks_domains.domain_identification.NRPSPKSDomains.from_json(json, record)
        assert len(results.cds_results) == 2
        assert len(record.get_cds_motifs()) == 2
        assert len(record.get_antismash_domains()) == 3

        two_domains = record.get_cds_by_name("two_domains")
        assert two_domains.nrps_pks.type == "NRPS"
        assert len(two_domains.nrps_pks.domains) == 2
        assert not two_domains.motifs

        one_domain = record.get_cds_by_name("one_domain")
        assert one_domain.nrps_pks.type == "other"
        assert len(one_domain.nrps_pks.domains) == 1
        assert len(one_domain.motifs) == 2

        no_hits = record.get_cds_by_name("no_hits")
        assert not no_hits.nrps_pks
示例#8
0
 def test_classification_with_colon(self):
     # since SMCOG id and description are stored in a string separated by :,
     # ensure that descriptions containing : are properly handled
     cds = helpers.DummyCDS(locus_tag="test")
     record = helpers.DummyRecord(features=[cds], seq="A" * 100)
     record.add_cluster(helpers.DummyCluster(0, 100))
     results = SMCOGResults(record.id)
     results.best_hits[cds.get_name()] = HMMResult(
         "SMCOG1212:sodium:dicarboxylate_symporter", 0, 100, 2.3e-126, 416)
     results.add_to_record(record)
     gene_functions = cds.gene_functions.get_by_tool("smcogs")
     assert len(gene_functions) == 1
     assert str(gene_functions[0]).startswith(
         "transport (smcogs) SMCOG1212:sodium:dicarboxylate_symporter"
         " (Score: 416; E-value: 2.3e-126)")
示例#9
0
    def test_add_when_regenerating(self):
        record = helpers.DummyRecord(seq="A" * 3800)
        record.id = 'Y16952.3.trimmed'
        record.add_cds_feature(
            helpers.DummyCDS(start=0, end=1800, locus_tag="two_domains"))
        record.add_cds_feature(
            helpers.DummyCDS(start=1900, end=4000, locus_tag="one_domain"))
        record.add_cds_feature(
            helpers.DummyCDS(start=4100, end=4400, locus_tag="no_hits"))

        a_domain = {
            'bitscore': 360.7,
            'query_end': 428,
            'evalue': 2.1e-110,
            'hit_id': 'AMP-binding',
            'query_start': 35
        }
        pcp = {
            'bitscore': 66.0,
            'query_end': 569,
            'evalue': 6.3e-21,
            'hit_id': 'PCP',
            'query_start': 504
        }

        two_domain_json = {
            'domain_hmms': [a_domain, pcp],
            'motif_hmms': [],
            'modules': [{
                "components": [
                    {
                        "domain": a_domain
                    },
                    {
                        "domain": pcp
                    },
                ]
            }],
            "ks_subtypes": [],
            'type': 'NRPS'
        }
        one_domain_json = {
            'domain_hmms': [{
                'bitscore': 76.9,
                'query_end': 382,
                'evalue': 3.9e-24,
                'hit_id': 'ECH',
                'query_start': 170
            }],
            'motif_hmms': [{
                'query_start': 18,
                'evalue': 4.7e-05,
                'query_end': 30,
                'bitscore': 16.1,
                'hit_id': 'C1_dual_004-017'
            }, {
                'query_start': 38,
                'evalue': 1.4e-19,
                'query_end': 78,
                'bitscore': 62.4,
                'hit_id': 'C2_DCL_024-062'
            }],
            "modules": [],  # arbitrarily none
            "ks_subtypes": [],
            'type':
            'other'
        }

        json = {
            'cds_results': {
                'two_domains': two_domain_json,
                'one_domain': one_domain_json
            },
            'record_id':
            record.id,
            'modules': [],
            'schema_version':
            nrps_pks_domains.domain_identification.NRPSPKSDomains.
            schema_version
        }

        assert not record.get_antismash_domains()
        assert not record.get_cds_motifs()

        results = nrps_pks_domains.domain_identification.NRPSPKSDomains.from_json(
            json, record)
        assert len(results.cds_results) == 2
        assert len(record.get_cds_motifs()) == 2
        assert len(record.get_antismash_domains()) == 3
        assert len(
            record.get_antismash_domains_by_tool("nrps_pks_domains")) == 3

        two_domains = record.get_cds_by_name("two_domains")
        assert two_domains.nrps_pks.type == "NRPS"
        assert len(two_domains.nrps_pks.domains) == 2
        assert not two_domains.motifs
        modules = results.cds_results[two_domains].modules
        assert modules
        assert modules[0]._loader._domain.hit_id == "AMP-binding"
        assert modules[0]._carrier_protein._domain.hit_id == "PCP"
        assert not two_domains.modules  # added in add_to_record

        one_domain = record.get_cds_by_name("one_domain")
        assert one_domain.nrps_pks.type == "other"
        assert len(one_domain.nrps_pks.domains) == 1
        assert len(one_domain.motifs) == 2
        assert not results.cds_results[one_domain].modules
        assert not one_domain.modules

        no_hits = record.get_cds_by_name("no_hits")
        assert not no_hits.nrps_pks