def create_fake_record(): """Set up a fake sequence record""" seq_record = helpers.DummyRecord( seq=Seq("acgtacgtacgtacgtacgtacgtacgtacgtacgtacgtacgtacgta" * 196)) seq_record.name = "test" locations = [ FeatureLocation(100, 300, strand=1), FeatureLocation(101, 299, strand=-1), FeatureLocation(250, 350, strand=1), FeatureLocation(500, 1000, strand=1), FeatureLocation(1111, 1500, strand=-1), FeatureLocation(2000, 2200, strand=-1), FeatureLocation(2999, 4000, strand=1), FeatureLocation(4321, 5678, strand=1), FeatureLocation(6660, 9000, strand=-1) ] for i in range(9): cds = helpers.DummyCDS(locus_tag="gene" + str(i + 1)) cds.location = locations[i] seq_record.add_cds_feature(cds) seq_record.add_gene( secmet.Gene(locations[i], locus_tag="gene" + str(i + 1))) if i == 3 or i == 5: cds.gene_functions.add(secmet.qualifiers.GeneFunction.CORE, "testtool", "dummy", "product") return seq_record
def setUp(self): self.genes = [] self.regions = [] domain_names = self.gen_domain_names() for product in ['not_atpks', 'transatpks']: cluster = helpers.DummyProtocluster(1, 2, product=product) candidate_cluster = helpers.DummyCandidateCluster([cluster]) self.regions.append(Region(candidate_clusters=[candidate_cluster])) for i in range(7): locus_tag = chr(ord('a') + i) if i == 6: locus_tag = "all" cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag) cds.product = product cds.nrps_pks = DummyNRPSQualfier() cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" + locus_tag] cds.cluster = cluster cluster.add_cds(cds) self.genes.append(cds) self.regions[-1].add_cds(cds) assert cds.region == self.regions[-1] self.predictions = [ 'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal', 'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal', 'mal', 'ccemal' ]
def test_classification_with_colon(self): # since SMCOG id and description are stored in a string separated by :, # ensure that descriptions containing : are properly handled # test gene is AQF52_5530 from CP013129.1 translation = ( "MDTHQREEDPVAARRDRTHYLYLAVIGAVLLGIAVGFLAPGVAVELKPLGTGFVN" "LIKMMISPIIFCTIVLGVGSVRKAAKVGAVGGLALGYFLVMSTVALAIGLLVGNL" "LEPGSGLHLTKEIAEAGAKQAEGGGESTPDFLLGIIPTTFVSAFTEGEVLQTLLV" "ALLAGFALQAMGAAGEPVLRGIGHIQRLVFRILGMIMWVAPVGAFGAIAAVVGAT" "GAAALKSLAVIMIGFYLTCGLFVFVVLGAVLRLVAGINIWTLLRYLGREFLLILS" "TSSSESALPRLIAKMEHLGVSKPVVGITVPTGYSFNLDGTAIYLTMASLFVAEAM" "GDPLSIGEQISLLVFMIIASKGAAGVTGAGLATLAGGLQSHRPELVDGVGLIVGI" "DRFMSEARALTNFAGNAVATVLVGTWTKEIDKARVTEVLAGNIPFDEKTLVDDHA" "PVPVPDQRAEGGEEKARAGV") cds = helpers.DummyCDS(0, len(translation)) cds.translation = translation results = smcogs.classify("test", [cds], get_config()) assert results.best_hits[cds.get_name( )].hit_id == "SMCOG1212:sodium:dicarboxylate symporter" record = helpers.DummyRecord(seq=translation) record.add_cds_feature(cds) record.add_protocluster(helpers.DummyProtocluster(0, len(translation))) # if we don't handle multiple semicolons right, this line will crash results.add_to_record(record) gene_functions = cds.gene_functions.get_by_tool("smcogs") assert len(gene_functions) == 1 assert str(gene_functions[0]).startswith( "transport (smcogs) SMCOG1212:sodium:dicarboxylate symporter" " (Score: 416; E-value: 2.3e-126)")
def setUp(self): self.genes = [] self.clusters = [] domain_names = self.gen_domain_names() for product in ['not_atpks', 'transatpks']: cluster = helpers.DummyCluster(1, 2, products=[product]) assert cluster.products == (product, ) for i in range(7): locus_tag = chr(ord('a') + i) if i == 6: locus_tag = "all" cds = helpers.DummyCDS(1, 2, locus_tag=locus_tag) cds.product = product cds.nrps_pks = DummyNRPSQualfier() cds.nrps_pks.domain_names = domain_names["nrpspksdomains_" + locus_tag] cds.cluster = cluster cluster.add_cds(cds) self.genes.append(cds) self.clusters.append(cluster) self.predictions = [ 'redmxmal', 'ccmal', 'ohemal', 'ohmxmal', 'ohmmal', 'ccmmal', 'emal', 'redmmal', 'mmal', 'ccmxmal', 'mxmal', 'redemal', 'ohmal', 'mal', 'ccemal' ]
def test_cds_removal(self): record = Record(Seq("A" * 1000)) cluster = helpers.DummyCluster(0, 1000) record.add_cluster(cluster) first_cds = helpers.DummyCDS(0, 100, locus_tag="A") second_cds = helpers.DummyCDS(200, 300, locus_tag="B") record.add_cds_feature(first_cds) record.add_cds_feature(second_cds) assert len(record.get_cds_features()) == 2 assert len(cluster.cds_children) == 2 record.remove_cds_feature(first_cds) assert len(record.get_cds_features()) == 1 assert len(cluster.cds_children) == 1 assert record.get_cds_features()[0] is list(cluster.cds_children)[0] assert record.get_cds_features()[0].locus_tag == "B"
def test_cds_cluster_linkage(self): record = Record("A"*200) for start, end in [(50, 100), (10, 90), (0, 9), (150, 200)]: record.add_cds_feature(helpers.DummyCDS(start, end)) for start, end in [(10, 120), (5, 110), (10, 160), (45, 200)]: record.clear_clusters() cluster = helpers.DummyCluster(start, end) record.add_cluster(cluster) assert len(cluster.cds_children) == 2 for cds in cluster.cds_children: assert cds.overlaps_with(cluster)
def test_add_when_regenerating(self): record = helpers.DummyRecord(seq="A"*3800) record.id = 'Y16952.3.trimmed' record.add_cds_feature(helpers.DummyCDS(start=0, end=1800, locus_tag="two_domains")) record.add_cds_feature(helpers.DummyCDS(start=1900, end=4000, locus_tag="one_domain")) record.add_cds_feature(helpers.DummyCDS(start=4100, end=4400, locus_tag="no_hits")) two_domain_json = {'domain_hmms': [{'bitscore': 360.7, 'query_end': 428, 'evalue': 2.1e-110, 'hit_id': 'AMP-binding', 'query_start': 35}, {'bitscore': 66.0, 'query_end': 569, 'evalue': 6.3e-21, 'hit_id': 'PCP', 'query_start': 504}], 'motif_hmms': [], 'type': 'NRPS'} one_domain_json = {'domain_hmms': [{'bitscore': 76.9, 'query_end': 382, 'evalue': 3.9e-24, 'hit_id': 'ECH', 'query_start': 170}], 'motif_hmms': [{'query_start': 18, 'evalue': 4.7e-05, 'query_end': 30, 'bitscore': 16.1, 'hit_id': 'C1_dual_004-017'}, {'query_start': 38, 'evalue': 1.4e-19, 'query_end': 78, 'bitscore': 62.4, 'hit_id': 'C2_DCL_024-062'}], 'type': 'other'} json = {'cds_results': {'two_domains': two_domain_json, 'one_domain': one_domain_json}, 'record_id': record.id, 'schema_version': 1} assert not record.get_antismash_domains() assert not record.get_cds_motifs() results = nrps_pks_domains.domain_identification.NRPSPKSDomains.from_json(json, record) assert len(results.cds_results) == 2 assert len(record.get_cds_motifs()) == 2 assert len(record.get_antismash_domains()) == 3 two_domains = record.get_cds_by_name("two_domains") assert two_domains.nrps_pks.type == "NRPS" assert len(two_domains.nrps_pks.domains) == 2 assert not two_domains.motifs one_domain = record.get_cds_by_name("one_domain") assert one_domain.nrps_pks.type == "other" assert len(one_domain.nrps_pks.domains) == 1 assert len(one_domain.motifs) == 2 no_hits = record.get_cds_by_name("no_hits") assert not no_hits.nrps_pks
def test_classification_with_colon(self): # since SMCOG id and description are stored in a string separated by :, # ensure that descriptions containing : are properly handled cds = helpers.DummyCDS(locus_tag="test") record = helpers.DummyRecord(features=[cds], seq="A" * 100) record.add_cluster(helpers.DummyCluster(0, 100)) results = SMCOGResults(record.id) results.best_hits[cds.get_name()] = HMMResult( "SMCOG1212:sodium:dicarboxylate_symporter", 0, 100, 2.3e-126, 416) results.add_to_record(record) gene_functions = cds.gene_functions.get_by_tool("smcogs") assert len(gene_functions) == 1 assert str(gene_functions[0]).startswith( "transport (smcogs) SMCOG1212:sodium:dicarboxylate_symporter" " (Score: 416; E-value: 2.3e-126)")
def test_add_when_regenerating(self): record = helpers.DummyRecord(seq="A" * 3800) record.id = 'Y16952.3.trimmed' record.add_cds_feature( helpers.DummyCDS(start=0, end=1800, locus_tag="two_domains")) record.add_cds_feature( helpers.DummyCDS(start=1900, end=4000, locus_tag="one_domain")) record.add_cds_feature( helpers.DummyCDS(start=4100, end=4400, locus_tag="no_hits")) a_domain = { 'bitscore': 360.7, 'query_end': 428, 'evalue': 2.1e-110, 'hit_id': 'AMP-binding', 'query_start': 35 } pcp = { 'bitscore': 66.0, 'query_end': 569, 'evalue': 6.3e-21, 'hit_id': 'PCP', 'query_start': 504 } two_domain_json = { 'domain_hmms': [a_domain, pcp], 'motif_hmms': [], 'modules': [{ "components": [ { "domain": a_domain }, { "domain": pcp }, ] }], "ks_subtypes": [], 'type': 'NRPS' } one_domain_json = { 'domain_hmms': [{ 'bitscore': 76.9, 'query_end': 382, 'evalue': 3.9e-24, 'hit_id': 'ECH', 'query_start': 170 }], 'motif_hmms': [{ 'query_start': 18, 'evalue': 4.7e-05, 'query_end': 30, 'bitscore': 16.1, 'hit_id': 'C1_dual_004-017' }, { 'query_start': 38, 'evalue': 1.4e-19, 'query_end': 78, 'bitscore': 62.4, 'hit_id': 'C2_DCL_024-062' }], "modules": [], # arbitrarily none "ks_subtypes": [], 'type': 'other' } json = { 'cds_results': { 'two_domains': two_domain_json, 'one_domain': one_domain_json }, 'record_id': record.id, 'modules': [], 'schema_version': nrps_pks_domains.domain_identification.NRPSPKSDomains. schema_version } assert not record.get_antismash_domains() assert not record.get_cds_motifs() results = nrps_pks_domains.domain_identification.NRPSPKSDomains.from_json( json, record) assert len(results.cds_results) == 2 assert len(record.get_cds_motifs()) == 2 assert len(record.get_antismash_domains()) == 3 assert len( record.get_antismash_domains_by_tool("nrps_pks_domains")) == 3 two_domains = record.get_cds_by_name("two_domains") assert two_domains.nrps_pks.type == "NRPS" assert len(two_domains.nrps_pks.domains) == 2 assert not two_domains.motifs modules = results.cds_results[two_domains].modules assert modules assert modules[0]._loader._domain.hit_id == "AMP-binding" assert modules[0]._carrier_protein._domain.hit_id == "PCP" assert not two_domains.modules # added in add_to_record one_domain = record.get_cds_by_name("one_domain") assert one_domain.nrps_pks.type == "other" assert len(one_domain.nrps_pks.domains) == 1 assert len(one_domain.motifs) == 2 assert not results.cds_results[one_domain].modules assert not one_domain.modules no_hits = record.get_cds_by_name("no_hits") assert not no_hits.nrps_pks