def preprocess(self,buildAnnotations=True): # print "Preprocessing" # if buildAnnotations: # annotation.go(self.rootdir, self.annotated_genes) #changed to annotation - Nafiz # intergene.go(self.rootdir, self.intergenes) print "Preprocessing" #Combine all genome files into a single genome fasta file #https://github.com/mortonjt/Boa/blob/master/src/format/fasta.py fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) #a class at fasta.py indexer.index() indexer.load() #https://github.com/mortonjt/Boa/blob/master/src/genome/intergene.py intergene.go(self.genome_dir,self.intergenes) #https://github.com/mortonjt/Boa/blob/master/src/annotation/annotation.py annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_dir = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".fna": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()
def preprocess(self): print "Preprocessing" #Combine all genome files into a single genome fasta file fasta.go(self.genome_dir, self.all_fasta, self.all_faidx, self.six_fasta, self.six_faidx) indexer = fasta.Indexer(self.all_fasta,self.all_faidx) indexer.index() indexer.load() intergene.go(self.genome_dir,self.intergenes) annotation.go(self.genome_dir,self.annotated_genes,index_obj=indexer) #Combine all gff files together outhandle = open(self.gff,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".gff": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() tmpfile = "tmp%d.faa"%(os.getpid()) outhandle = open(tmpfile,'w') for root, subFolders, files in os.walk(self.genome_dir): for fname in files: genome_files = [] organism,ext = os.path.splitext(os.path.basename(fname)) absfile=os.path.join(root,fname) if ext==".faa": shutil.copyfileobj(open(absfile),outhandle) outhandle.close() faa.reformat(tmpfile,self.faa) os.remove(tmpfile) faaindex = fasta.Indexer(self.faa,self.faaidx) faaindex.index()