Пример #1
0
    def preprocess(self,buildAnnotations=True):
        

        # print "Preprocessing"
        # if buildAnnotations:
        #     annotation.go(self.rootdir, self.annotated_genes)    #changed to annotation - Nafiz 
        #     intergene.go(self.rootdir, self.intergenes)


        print "Preprocessing"
        #Combine all genome files into a single genome fasta file
        #https://github.com/mortonjt/Boa/blob/master/src/format/fasta.py
        fasta.go(self.genome_dir,
                 self.all_fasta,
                 self.all_faidx,
                 self.six_fasta,
                 self.six_faidx) 
        indexer = fasta.Indexer(self.all_fasta,self.all_faidx) #a class at fasta.py 
        indexer.index()
        indexer.load()

        #https://github.com/mortonjt/Boa/blob/master/src/genome/intergene.py
        intergene.go(self.genome_dir,self.intergenes)

        #https://github.com/mortonjt/Boa/blob/master/src/annotation/annotation.py
        annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) 
        
        #Combine all gff files together
        outhandle = open(self.gff,'w')
        for root, subFolders, files in os.walk(self.genome_dir):
            for fname in files:
                genome_dir = []
                organism,ext = os.path.splitext(os.path.basename(fname))
                absfile=os.path.join(root,fname)
                if ext==".gff":
                    shutil.copyfileobj(open(absfile),outhandle)
        outhandle.close()
        
        tmpfile = "tmp%d.faa"%(os.getpid())
        outhandle = open(tmpfile,'w')
        for root, subFolders, files in os.walk(self.genome_dir):
            for fname in files:
                genome_dir = []
                organism,ext = os.path.splitext(os.path.basename(fname))
                absfile=os.path.join(root,fname)
                if ext==".fna":
                    shutil.copyfileobj(open(absfile),outhandle)
        outhandle.close()
        faa.reformat(tmpfile,self.faa)
        os.remove(tmpfile)
        
        faaindex = fasta.Indexer(self.faa,self.faaidx)
        faaindex.index()
Пример #2
0
 def preprocess(self):
     print "Preprocessing"
     #Combine all genome files into a single genome fasta file
     fasta.go(self.genome_dir,
              self.all_fasta,
              self.all_faidx,
              self.six_fasta,
              self.six_faidx) 
     indexer = fasta.Indexer(self.all_fasta,self.all_faidx)
     indexer.index()
     indexer.load()
     intergene.go(self.genome_dir,self.intergenes)
     annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) 
     #Combine all gff files together
     outhandle = open(self.gff,'w')
     for root, subFolders, files in os.walk(self.genome_dir):
         for fname in files:
             genome_files = []
             organism,ext = os.path.splitext(os.path.basename(fname))
             absfile=os.path.join(root,fname)
             if ext==".gff":
                 shutil.copyfileobj(open(absfile),outhandle)
     outhandle.close()
     
     tmpfile = "tmp%d.faa"%(os.getpid())
     outhandle = open(tmpfile,'w')
     for root, subFolders, files in os.walk(self.genome_dir):
         for fname in files:
             genome_files = []
             organism,ext = os.path.splitext(os.path.basename(fname))
             absfile=os.path.join(root,fname)
             if ext==".faa":
                 shutil.copyfileobj(open(absfile),outhandle)
     outhandle.close()
     faa.reformat(tmpfile,self.faa)
     os.remove(tmpfile)
     
     faaindex = fasta.Indexer(self.faa,self.faaidx)
     faaindex.index()