示例#1
0
文件: postHMMER.py 项目: mortonjt/Boa
from itol import *
from muscle import Muscle
from mafft import MAFFT
import clique_filter

import quorum

if __name__=="__main__":
    faidx = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/data/all_trans.fai"
    gffFile = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/data/all.gff"
    folder = "/home/mortonjt/Projects/Bacfinder/workspace/quorum/intermediate"
    if os.path.exists("test.pickle"):
        all_hits = cPickle.load(open("test.pickle",'rb'))
    else:
        
        toxin_hits     = hmmer.parse("%s/toxin.out"%folder)
        modifier_hits  = hmmer.parse("%s/modifier.out"%folder)
        immunity_hits  = hmmer.parse("%s/immunity.out"%folder)
        regulator_hits = hmmer.parse("%s/regulator.out"%folder)
        transport_hits = hmmer.parse("%s/transport.out"%folder)
        
        gff = gff.GFF(gff_file=gffFile,fasta_index=faidx)
        toxin_hits     = gff.call_orfs(toxin_hits    )
        modifier_hits  = gff.call_orfs(modifier_hits )
        immunity_hits  = gff.call_orfs(immunity_hits )
        regulator_hits = gff.call_orfs(regulator_hits)
        transport_hits = gff.call_orfs(transport_hits)
        open("%s/toxin_orfs.out"%folder,'w').write(hmmer.hmmerstr(toxin_hits))
        open("%s/modifier_orfs.out"%folder,'w').write(hmmer.hmmerstr(modifier_hits))
        open("%s/immunity_orfs.out"%folder,'w').write(hmmer.hmmerstr(immunity_hits))
        open("%s/regulator_orfs.out"%folder,'w').write(hmmer.hmmerstr(regulator_hits))
示例#2
0
    def cliqueFilter(self,clique_radius=50000,threshold=62,functions = ["toxin","modifier","immunity","transport","regulator"]):
        print "Clique filtering","Looking for cliques with",functions
        
        toxin_hits     = hmmer.parse("%s/toxin.out"%self.intermediate)
        modifier_hits  = hmmer.parse("%s/modifier.out"%self.intermediate)
        immunity_hits  = hmmer.parse("%s/immunity.out"%self.intermediate)
        regulator_hits = hmmer.parse("%s/regulator.out"%self.intermediate)
        transport_hits = hmmer.parse("%s/transport.out"%self.intermediate)
        faaindex = fasta.Indexer(self.faa,self.faaidx)
        faaindex.index()
        faaindex.load()
        genefile = gff.GFF(self.gff,fasta_index=self.faaidx)
        genefile.indexdb()
        toxin_hits     = genefile.call_orfs(toxin_hits    ,faaindex)
        modifier_hits  = genefile.call_orfs(modifier_hits ,faaindex)
        immunity_hits  = genefile.call_orfs(immunity_hits ,faaindex)
        regulator_hits = genefile.call_orfs(regulator_hits,faaindex)
        transport_hits = genefile.call_orfs(transport_hits,faaindex)

        toxin_hits = threshold_filter.filter(toxin_hits,threshold)
        modifier_hits = threshold_filter.filter(modifier_hits,threshold)
        immunity_hits = threshold_filter.filter(immunity_hits,threshold)
        regulator_hits = threshold_filter.filter(regulator_hits,threshold)
        transport_hits = threshold_filter.filter(transport_hits,threshold)

        all_hits = toxin_hits+modifier_hits+immunity_hits+regulator_hits+transport_hits
        seq_dict = {x[0]:x[1] for x in all_hits}
   
        del all_hits
        
        toxin_ids,toxin_seqs = zip(*toxin_hits)
        modifier_ids,modifier_seqs = zip(*modifier_hits)
        immunity_ids,immunity_seqs = zip(*immunity_hits)
        regulator_ids,regulator_seqs = zip(*regulator_hits)
        transport_ids,transport_seqs = zip(*transport_hits)
        
        del toxin_hits
        del modifier_hits
        del immunity_hits
        del regulator_hits
        del transport_hits
        
        all_ids = toxin_ids+modifier_ids+immunity_ids+regulator_ids+transport_ids
        all_ids = interval_filter.unique(all_ids)
        # #Sort by start/end position and genome name
        all_ids=sorted(all_ids,key=lambda x: x[6])   
        all_ids=sorted(all_ids,key=lambda x: x[5])
        all_ids=sorted(all_ids,key=lambda x: x[0])
        #all_ids=sorted(all_ids,key=lambda x: x[-3])
        
        del toxin_ids
        del modifier_ids
        del immunity_ids
        del regulator_ids
        del transport_ids
        print "all ids",len(all_ids)
        print '\n'.join(map(str,all_ids[:10]))
        #Find operons with at least a toxin and a transport
        clusters = clique_filter.findContextGeneClusters(all_ids,
                                                         radius=clique_radius,
                                                         backtrans=False,
                                                         functions=["toxin","transport"])
        print "Clusters: ",len(clusters)
        outhandle = open(self.operons_out,'w')
        self.writeClusters(clusters,seq_dict,outhandle) 
        outhandle.close()
        #Predict operons based on just context genes
        clusters = clique_filter.findContextGeneClusters(all_ids,
                                                         radius=clique_radius,
                                                         backtrans=False,   
                                                         functions=["modifier","regulator","immunity","transport"])
        print "Clusters: ",len(clusters)
        outhandle = open(self.pred_operons_out,'w')
        self.writeClusters(clusters,seq_dict,outhandle) 
        outhandle.close()