def setUp(self): self.config = build_config(["--cf-create-clusters", "--cf-mean-threshold", "0.6", "--cf-min-cds", "5", "--cf-min-pfams", "5"], modules=[clusterfinder], isolated=True) update_config({"enabled_cluster_types": []}) self.record = DummyRecord(seq=Seq("A" * 2000)) for start, end, probability, pfam_id in [(10, 20, 0.1, 'PF77777'), (30, 40, 0.3, 'PF00106'), (50, 60, 0.4, 'PF00107'), (60, 70, 0.7, 'PF00109'), (70, 80, 0.98, 'PF08484'), (90, 100, 0.8, 'PF02401'), (100, 110, 0.32, 'PF04369'), (110, 120, 1.0, 'PF00128'), (130, 140, 0.2, 'PF77776'), (500, 505, None, 'PF77775'), (1010, 1020, 0.1, 'PF77774'), (1030, 1040, 0.3, 'PF00106'), (1050, 1060, 0.4, 'PF00107'), (1060, 1070, 0.7, 'PF00109'), (1070, 1080, 0.98, 'PF08484'), (1090, 1100, 0.8, 'PF02401'), (1100, 1110, 0.32, 'PF04369'), (1110, 1120, 1.0, 'PF00128')]: location = FeatureLocation(start, end, strand=1) self.record.add_cds_feature(CDSFeature(location, locus_tag=str(start), translation="A")) pfam = DummyPFAMDomain(location=location, protein_start=start + 1, protein_end=end-1, identifier=pfam_id) pfam.domain_id = "pfam_%d" % start pfam.probability = probability self.record.add_pfam_domain(pfam)
def test_blank_records(self): blank_no_pfams = DummyRecord() blank_no_ids = Record(Seq("ATGTTATGAGGGTCATAACAT", generic_dna)) fake_pfam = DummyPFAMDomain(identifier="PF00000") blank_no_ids.add_pfam_domain(fake_pfam) assert not pfam2go.get_gos_for_pfams(blank_no_pfams) assert not pfam2go.get_gos_for_pfams(blank_no_ids)
def setUp(self): self.record = secmet.Record() # except for Thioesterase, all domains were found in BN001301.1 # TE domains were found in Y16952 for filename, domain_type in [("PKS_KS.input", "PKS_KS"), ("AT.input", "PKS_AT"), ("ACP.input", "ACP"), ("DH.input", "PKS_DH"), ("KR.input", "PKS_KR"), ("TE.input", "Thioesterase"), ("ER.input", "PKS_ER")]: for domain in rebuild_domains(filename, domain_type): self.record.add_antismash_domain(domain) # these PFAMs found in BN001301.1 with clusterhmmer, one was excluded # to avoid a Biopython SearchIO bug domain_fasta = fasta.read_fasta(path.get_full_path(__file__, 'data', "p450.input")) for name, translation in domain_fasta.items(): pfam_domain = DummyPFAMDomain(domain="p450", domain_id="PFAM_p450_" + name) pfam_domain.translation = translation self.record.add_pfam_domain(pfam_domain)
def test_domains_of_interest(self): domain = DummyPFAMDomain(domain="p450") analysis = ActiveSiteAnalysis("not-p450", (domain, ), "PKSI-KR.hmm2", [5, 6], ["C", "S"]) assert analysis.domains_of_interest == [] analysis = ActiveSiteAnalysis("p450", (domain, ), "PKSI-KR.hmm2", [5, 6], ["C", "S"]) assert analysis.domains_of_interest == [domain]
def set_dummy_with_pfams(pfam_ids: Dict[str, FeatureLocation]) -> DummyRecord: pfam_domains = [] for pfam_id, pfam_location in pfam_ids.items(): domain_id = '%s.%d.%d' % (pfam_id, pfam_location.start, pfam_location.end) pfam_domain = DummyPFAMDomain(location=pfam_location, protein_start=0, protein_end=5, identifier=pfam_id, domain_id=domain_id) pfam_domains.append(pfam_domain) return DummyRecord(features=pfam_domains)
def test_add_results_to_record(self): pfams = { 'PF00015.2': FeatureLocation(0, 3), 'PF00351.1': FeatureLocation(0, 3), 'PF00015.27': FeatureLocation(3, 6) } fake_record = set_dummy_with_pfams(pfams) fake_duplicate_pfam = DummyPFAMDomain(identifier="PF00015.2") fake_record.add_pfam_domain(fake_duplicate_pfam) assert fake_duplicate_pfam in fake_record.get_pfam_domains() gos_for_fake_pfam = pfam2go.get_gos_for_pfams(fake_record) fake_results = pfam2go.Pfam2GoResults(fake_record.id, gos_for_fake_pfam) fake_results.add_to_record(fake_record) assert fake_duplicate_pfam.full_identifier == 'PF00015.2' for pfam in fake_record.get_pfam_domains(): assert sorted(pfam.gene_ontologies.ids) == sorted( fake_results.get_all_gos(pfam)) # make sure identical pfams (with different version numbers) all have the same gene ontologies if pfam.identifier == "PF00015": assert pfam.version in [2, 27] assert sorted(pfam.gene_ontologies.ids) == sorted( fake_results.get_all_gos(fake_duplicate_pfam))
def setUp(self): self.domain = DummyPFAMDomain(domain="p450") self.alignment = Alignment(self.domain, "WLAD-QGAR", "WLaer.rGA", 10, 19)