from itol import * from muscle import Muscle from mafft import MAFFT import clique_filter import quorum if __name__=="__main__": faidx = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/data/all_trans.fai" gffFile = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/data/all.gff" folder = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/intermediate" if os.path.exists("test.pickle"): all_hits = cPickle.load(open("test.pickle",'rb')) else: toxin_hits = hmmer.parse("%s/toxin.out"%folder) modifier_hits = hmmer.parse("%s/modifier.out"%folder) immunity_hits = hmmer.parse("%s/immunity.out"%folder) regulator_hits = hmmer.parse("%s/regulator.out"%folder) transport_hits = hmmer.parse("%s/transport.out"%folder) gff = gff.GFF(gff_file=gffFile,fasta_index=faidx) toxin_hits = gff.call_orfs(toxin_hits ) modifier_hits = gff.call_orfs(modifier_hits ) immunity_hits = gff.call_orfs(immunity_hits ) regulator_hits = gff.call_orfs(regulator_hits) transport_hits = gff.call_orfs(transport_hits) open("%s/toxin_orfs.out"%folder,'w').write(hmmer.hmmerstr(toxin_hits)) open("%s/modifier_orfs.out"%folder,'w').write(hmmer.hmmerstr(modifier_hits)) open("%s/immunity_orfs.out"%folder,'w').write(hmmer.hmmerstr(immunity_hits)) open("%s/regulator_orfs.out"%folder,'w').write(hmmer.hmmerstr(regulator_hits))
def cliqueFilter(self,clique_radius=50000,threshold=62,functions = ["toxin","modifier","immunity","transport","regulator"]): print "Clique filtering","Looking for cliques with",functions toxin_hits = hmmer.parse("%s/toxin.out"%self.intermediate) modifier_hits = hmmer.parse("%s/modifier.out"%self.intermediate) immunity_hits = hmmer.parse("%s/immunity.out"%self.intermediate) regulator_hits = hmmer.parse("%s/regulator.out"%self.intermediate) transport_hits = hmmer.parse("%s/transport.out"%self.intermediate) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index() faaindex.load() genefile = gff.GFF(self.gff,fasta_index=self.faaidx) genefile.indexdb() toxin_hits = genefile.call_orfs(toxin_hits ,faaindex) modifier_hits = genefile.call_orfs(modifier_hits ,faaindex) immunity_hits = genefile.call_orfs(immunity_hits ,faaindex) regulator_hits = genefile.call_orfs(regulator_hits,faaindex) transport_hits = genefile.call_orfs(transport_hits,faaindex) toxin_hits = threshold_filter.filter(toxin_hits,threshold) modifier_hits = threshold_filter.filter(modifier_hits,threshold) immunity_hits = threshold_filter.filter(immunity_hits,threshold) regulator_hits = threshold_filter.filter(regulator_hits,threshold) transport_hits = threshold_filter.filter(transport_hits,threshold) all_hits = toxin_hits+modifier_hits+immunity_hits+regulator_hits+transport_hits seq_dict = {x[0]:x[1] for x in all_hits} del all_hits toxin_ids,toxin_seqs = zip(*toxin_hits) modifier_ids,modifier_seqs = zip(*modifier_hits) immunity_ids,immunity_seqs = zip(*immunity_hits) regulator_ids,regulator_seqs = zip(*regulator_hits) transport_ids,transport_seqs = zip(*transport_hits) del toxin_hits del modifier_hits del immunity_hits del regulator_hits del transport_hits all_ids = toxin_ids+modifier_ids+immunity_ids+regulator_ids+transport_ids all_ids = interval_filter.unique(all_ids) # #Sort by start/end position and genome name all_ids=sorted(all_ids,key=lambda x: x[6]) all_ids=sorted(all_ids,key=lambda x: x[5]) all_ids=sorted(all_ids,key=lambda x: x[0]) #all_ids=sorted(all_ids,key=lambda x: x[-3]) del toxin_ids del modifier_ids del immunity_ids del regulator_ids del transport_ids print "all ids",len(all_ids) print '\n'.join(map(str,all_ids[:10])) #Find operons with at least a toxin and a transport clusters = clique_filter.findContextGeneClusters(all_ids, radius=clique_radius, backtrans=False, functions=["toxin","transport"]) print "Clusters: ",len(clusters) outhandle = open(self.operons_out,'w') self.writeClusters(clusters,seq_dict,outhandle) outhandle.close() #Predict operons based on just context genes clusters = clique_filter.findContextGeneClusters(all_ids, radius=clique_radius, backtrans=False, functions=["modifier","regulator","immunity","transport"]) print "Clusters: ",len(clusters) outhandle = open(self.pred_operons_out,'w') self.writeClusters(clusters,seq_dict,outhandle) outhandle.close()