def moveBLASTResults(self):
    
        try:
                
            print "moveBLASTResults"

            orgListFile_fh = open(self.orgListFile)

            NGS_Util.zipDirectory(self.orgBlastResDir)
	    
            NGS_Util.moveDirectoryFiles(self.orgBlastResDir,self.moveToDir_orgBlastResDir)


	    for line in orgListFile_fh:

                organismNameID, organismName = line.strip().split()

		orgRectifyBlast = NGS_Util.createFilePath(self.jointBlastDir, organismName + ".joint.blast")
		moveto_orgRectifyBlast = NGS_Util.createFilePath(self.moveToDir_jointBlastDir, organismName + ".joint.blast")

		self.moveFile_createLink(orgRectifyBlast,moveto_orgRectifyBlast)
		    

            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
    def copySequenceFiles(self, srcDataDir):
    
        try:
                
            print("Copy Fasta Files from %s to %s" %(srcDataDir,self.orgFastaDir))

            orgListFile_fh = open(self.orgListFile)

	    for line in orgListFile_fh:

                organismNameID, organismName = line.strip().split()

 		if not os.path.exists( NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa") ):

 		    orgFasta = NGS_Util.createFilePath(srcDataDir, organismName + ".faa")
		
		    NGS_Util.copyFile(orgFasta, self.orgFastaDir)		    
                    print("Copied fasta file for %s" % (organismName))
                else:
                    print("\tDoing nothing (files already copied) for %s" % (organismName))
                

            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
    def moveIPRScanResults(self):
    
        try:
                
            print "moveIPRScanResults"

            orgListFile_fh = open(self.orgListFile)

            NGS_Util.zipDirectory(self.orgIPRScanDir)
	    NGS_Util.moveDirectoryFiles(self.orgIPRScanDir,self.moveToDir_orgIPRScanDir)

            for line in orgListFile_fh:

                organismNameID, organismName = line.strip().split()

		organism_raw_final        = NGS_Util.createFilePath(self.InterProScan_EC_RAW_results, organismName + ".faa.raw.txt")
		moveto_organism_raw_final = NGS_Util.createFilePath(self.moveToDir_InterProScan_EC_RAW_results, organismName + ".faa.raw.txt")

		self.moveFile_createLink(organism_raw_final,moveto_organism_raw_final)


		organism_IPR_final        = NGS_Util.createFilePath(self.InterProScan_EC_RAW_results, organismName + ".faa.IPR.final.txt")
		moveto_organism_IPR_final = NGS_Util.createFilePath(self.moveToDir_InterProScan_EC_RAW_results, organismName + ".faa.IPR.final.txt")
		
		self.moveFile_createLink(organism_IPR_final,moveto_organism_IPR_final)
		    

            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def moveGTGResults(self):
    
        try:
                
            print "moveGTGResults"

            orgListFile_fh = open(self.orgListFile)


            NGS_Util.zipDirectory(self.orgGTGBlastResDir)

	    NGS_Util.moveDirectoryFiles(self.orgGTGBlastResDir,self.moveToDir_orgGTGBlastResDir)

            NGS_Util.zipDirectory(self.GTGBestHitsDir)

	    NGS_Util.moveDirectoryFiles(self.GTGBestHitsDir,self.moveToDir_GTGBestHitsDir)
	    
	    
	    for line in orgListFile_fh:

                organismNameID, organismName = line.strip().split()

		org_gtg_knn_final        = NGS_Util.createFilePath(self.GTGKNNDir, organismNameID + ".gtg.knn")
		moveto_org_gtg_knn_final = NGS_Util.createFilePath(self.moveToDir_GTGKNNDir, organismNameID + ".gtg.knn")

		self.moveFile_createLink(org_gtg_knn_final,moveto_org_gtg_knn_final)
		    

            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
    def blast_org_vs_nr40_blast_formatted_11(self, organismName):
    
        try:
           
            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            clusterArrayCall =  "qsub -t 1-1 " + ScriptsDir.ClusterGTGBlast
            
            blastP = NGS_Util.createFilePath(ScriptsDir.BlastDir,"blastp")
            
            outfmt = str(11)
            
            org_vs_nr40BlastDB_f11 = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2.txt")    

            call = clusterArrayCall + " "  + blastP  + " "  + self.nrdb40_blast_db + " "  + org_fasta + " " + outfmt + " " + org_vs_nr40BlastDB_f11  + " " + str(self.blastEValue)
                
            NGS_Util.executeCall(call)


            return org_vs_nr40BlastDB_f11
            	    
    
        except Exception:
            
            print traceback.print_exc()
            
        return ""
 def concatenate_Org_vs_Uniprot_ClusterBlast_results(self, organismName):
 
     try:
         
         clusterProcessing = True
         
         for fragment in range(self.numberOfFragments):
                                        
             org_vs_UniprotBlastDB =  NGS_Util.createFilePath(self.orgBlastResDir, organismName + "-vs-up_" + str(fragment+1) + ".blast" )
             
             if not os.path.exists(org_vs_UniprotBlastDB):
                 clusterProcessing = False
                 break
                 
                
         if (clusterProcessing):
                                
             org_vs_UniprotBlastDB =  NGS_Util.createFilePath(self.orgBlastResDir, organismName + "-vs-up.blast" )
 
             call = "cat " + NGS_Util.createFilePath(self.orgBlastResDir, organismName + "-vs-up_*") + " > " + org_vs_UniprotBlastDB
 
             NGS_Util.executeCall(call)
             
             
             return org_vs_UniprotBlastDB
         
         else:
             print organismName + "-vs-Uniprot BLAST incomplete"
         
 
     except Exception:
         
         print traceback.print_exc()
         
     return ""
Пример #7
0
def generatePics(currentRunDir):

    try:

        print "generatePics"

        call = "cat " + projectBinDir + "r_analyseClustering.R | R --slave --args " + currentRunDir

        NGS_Util.executeCall(call)

        picsDir = NGS_Util.createDirectoryPath(currentRunDir, "PicsTables")

        epsFile = NGS_Util.createFilePath(picsDir, "SenSpeVsInfAll.eps")

        pngFile = NGS_Util.createFilePath(picsDir, "SenSpeVsInfAll.png")

        call = "convert " + epsFile + " " + pngFile

        NGS_Util.executeCall(call)

    except Exception:

        print traceback.print_exc()

    return ""
    def rawIPRScan(self, organismName, org_ipr_split_dir):
    
        try:
        
            print "rawIPRScan: " + organismName

	    
            for fragment in range(self.numberOfFragments):

		org_ipr_split_file = NGS_Util.createFilePath(self.splitFasta.organismSplitDataDir,organismName + "_" +  str(fragment+1) )

		self.raw_split_IPRScan(organismName, org_ipr_split_file, str(fragment+1))


            ipr_raw_file_split = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split_*")
            
            ipr_raw_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".ipr.raw")
            
            call = "cat " + ipr_raw_file_split + " > " + ipr_raw_file

	    NGS_Util.executeCall(call)

            return ipr_raw_file
        
        except Exception:
            
            print traceback.print_exc()
        
        return ""
Пример #9
0
    def makeBlastDB(self, organismName):

        try:

            print "Make Blast Database: " + organismName

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir,
                                                organismName + ".faa")

            org_dust = NGS_Util.createFilePath(self.orgBlastDustDir,
                                               organismName + "_dust.asnb")

            org_blast_db = NGS_Util.createFilePath(self.orgBlastDBDir,
                                                   organismName)

            if os.path.exists(org_fasta):

                if not os.path.exists(org_blast_db +
                                      ".phd") and not os.path.exists(
                                          org_blast_db + ".psq"):

                    self.ngsBlast.makeProteinBlastDBFromDustFile(
                        org_fasta, org_dust, org_blast_db)

                return org_blast_db

        except Exception:

            print traceback.print_exc()

        return ""
    def makeBlastDB(self, organismName):
    
        try:

            print "Make Blast Database: " + organismName
            
            org_fasta    = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")
            
            org_dust     = NGS_Util.createFilePath(self.orgBlastDustDir, organismName+"_dust.asnb")
            
            org_blast_db = NGS_Util.createFilePath(self.orgBlastDBDir, organismName)
            
            if os.path.exists(org_fasta):

                if not os.path.exists(org_blast_db + ".phd") and not os.path.exists(org_blast_db + ".psq"):
                
                    self.ngsBlast.makeProteinBlastDBFromDustFile(org_fasta,org_dust,org_blast_db)
                
                return org_blast_db
            
        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def runClusterIPRScan(self, organismName):
    
        try:
           
            splitFasta = MetabolicReconstructionPipeline_SplitFasta.MetabolicReconstructionPipeline_SplitFasta()
           
            #numberOfFragments = 10
                       
            ###############################################################################################################################################

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            splitFasta.splitOrganismDataFile(organismName, org_fasta, self.numberOfFragments)

            ###################################################################################################################################
            
            clusterArrayCall =  "qsub -t 1-" + str(self.numberOfFragments) + ":1 " +  ScriptsDir.ClusterIprscan
            
            iprscan = NGS_Util.createFilePath(ScriptsDir.IprscanDir,"interproscan.sh ")
            
            splitFile = splitFasta.organismSplitDataDir + organismName
            
            ipr_raw_file_split = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split" )
                           
            call = clusterArrayCall + " "  + iprscan  + " "  + splitFile + " " + ipr_raw_file_split
            
            NGS_Util.executeCall(call)
            
    
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def run_Org_vs_Uniprot_ClusterBlast(self, organismName):
    
        try:
              
            splitFasta = MetabolicReconstructionPipeline_SplitFasta.MetabolicReconstructionPipeline_SplitFasta()
           
            ###############################################################################################################################################

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            splitFasta.splitOrganismDataFile(organismName, org_fasta, self.numberOfFragments)

            ###################################################################################################################################
            
            clusterArrayCall =  "qsub -t 1-" + str(self.numberOfFragments) + ":1 " + ScriptsDir.ClusterBlast
            
            blastP = NGS_Util.createFilePath(ScriptsDir.BlastDir,"blastp")
            
            outfmt = str(6)
            
            splitFile = splitFasta.organismSplitDataDir + organismName 

            org_vs_UniprotBlastDB =  NGS_Util.createFilePath(self.orgBlastResDir, organismName + "-vs-up" )
                

            call = clusterArrayCall + " "  + blastP  + " "  + self.uniprot_blast_db + " "  + splitFile + " " + outfmt + " " + org_vs_UniprotBlastDB  + " " + str(self.blastEValue)
                

            NGS_Util.executeCall(call)



        except Exception:
            
            print traceback.print_exc()
    def getBlastScore(self, mode):
    
            try:

                print "getBlastScores"

                orgListFile_fh = open(self.orgListFile)
    
                for line in orgListFile_fh:
                    
                    organismNameID, organismName = line.strip().split()
                    
                    orgJointBlast   = NGS_Util.createFilePath(self.orgBlastResDir, organismName + ".joint.blast")
                    orgRectifyBlast = NGS_Util.createFilePath(self.jointBlastDir, organismName + ".joint.blast")
                    
                    print "getBlastScore:" + organismName
                    if not os.path.exists(orgRectifyBlast):
                        
                        if os.path.exists(orgJointBlast):

                            orgRectifyBlast = self.rectifyBlast(organismName, orgJointBlast)
                        
                        else:

                            if (mode == 1):                                                        
                                
                                org_blast_db = self.makeBlastDB(organismName)
                                                       
                                self.run_Org_vs_Uniprot_ClusterBlast(organismName)
                                
                                time.sleep(1800) #wait for 15 minutes
                                    
                                self.run_Uniprot_vs_Org_ClusterBlast(organismName)

                                time.sleep(2400) #wait for 20 minutes

                            elif (mode == 2):
                                
                                org_vs_UniprotBlastDB = self.concatenate_Org_vs_Uniprot_ClusterBlast_results(organismName)
                                Uniprot_vs_orgBlastDB = self.concatenate_Uniprot_vs_Org_ClusterBlast_results(organismName)
                            
                                if (org_vs_UniprotBlastDB != "" and Uniprot_vs_orgBlastDB != ""):
                                    orgJointBlast = self.combineBlast(organismName, org_vs_UniprotBlastDB, Uniprot_vs_orgBlastDB)
                                
                                    if (orgJointBlast != ""):
                                        orgRectifyBlast = self.rectifyBlast(organismName, orgJointBlast)
                              
                orgListFile_fh.close() 
         
            except Exception:
                
                print traceback.print_exc()
                
            return ""
    def getIPRScanScore(self, mode):
    
        try:
        
                
            print "getIPRScanScore"

            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                
                organismNameID, organismName = line.strip().split()
                
                organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")

               
                if not os.path.exists(organism_IPR_final):
                    
                    print "getIPRScanScore : " + organismName
                    
                    if mode == 1:

                        self.runClusterIPRScan(organismName)
                        
                        time.sleep(21600) # sleep for 6 hrs
                        
                    elif mode == 2:
                        
                        ipr_xml_file = self.concatenate_ClusterIPRScan_results(organismName)                        
                        ipr_raw_file = self.xmlIPRScanToRAWOutput(organismName, ipr_xml_file)
    
                        organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file)
                        organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go)
                        organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file)
                        
                       
                        if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec):
    
    
                            organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt")                        
                            organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")
                            
                            NGS_Util.copyFile(ipr_raw_file, organism_raw_final)
                            NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final)


            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def combineBlast(self, organismName, org_vs_UniprotBlastDB, Uniprot_vs_orgBlastDB):
    
        try:
        
            print "Combine Blast: " + organismName
            
            orgJointBlast            = NGS_Util.createFilePath(self.orgBlastResDir, organismName + ".joint.blast")
            
            org_vs_UniprotBlastDB_fh = open(org_vs_UniprotBlastDB)
            
            Uniprot_vs_orgBlastDB_fh = open(Uniprot_vs_orgBlastDB)
            
            ec_files_fh              = open(self.ec_files)
            
            orgJointBlast_fh         = open(orgJointBlast, "w")
            

            combineBlasts(ec_files_fh, org_vs_UniprotBlastDB_fh, Uniprot_vs_orgBlastDB_fh, orgJointBlast_fh)

            
            org_vs_UniprotBlastDB_fh.close()
            Uniprot_vs_orgBlastDB_fh.close()
            ec_files_fh.close()
            orgJointBlast_fh.close()
            
 
            return orgJointBlast
        
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #16
0
    def gettNormalizedBlastBitScoreMatrix(self, pfamID, bitScoreMatrix, blastResultFile):
    
        try:

            print "gettNormalizedBlastBitScoreMatrix"
            
                        
            normalizedBitScoreMatrix = self.createBlastBitScoreMatrix(blastResultFile)
            
            for sequenceId, matchingSequenceIDsList in bitScoreMatrix.iteritems():
        
                for matchingSequenceID in matchingSequenceIDsList:
                    
                    sum = bitScoreMatrix[sequenceId][sequenceId] + bitScoreMatrix[matchingSequenceID][matchingSequenceID] - bitScoreMatrix[sequenceId][matchingSequenceID]
                    
                    normalizedBitScoreMatrix[sequenceId][matchingSequenceID] = bitScoreMatrix[sequenceId][matchingSequenceID] / sum


            normalizedBitScoreMatrixFile = NGS_Util.createFilePath(self.pfamBlastStaticticsDir, pfamID + "_NormalizedBitScores.txt")
            self.writeMatrixToFile(normalizedBitScoreMatrix, normalizedBitScoreMatrixFile)
            
            return normalizedBitScoreMatrix
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def combineBlast(self, organismName, org_vs_UniprotBlastDB, Uniprot_vs_orgBlastDB):
    
        try:
        
            print "Combine Blast: " + organismName
            
            orgJointBlast            = NGS_Util.createFilePath(self.orgBlastResDir, organismName + ".joint.blast")
            
            org_vs_UniprotBlastDB_fh = open(org_vs_UniprotBlastDB)
            
            Uniprot_vs_orgBlastDB_fh = open(Uniprot_vs_orgBlastDB)
            
            ec_files_fh              = open(self.ec_files)
            
            orgJointBlast_fh         = open(orgJointBlast, "w")
            

            combineBlasts(ec_files_fh, org_vs_UniprotBlastDB_fh, Uniprot_vs_orgBlastDB_fh, orgJointBlast_fh)

            
            org_vs_UniprotBlastDB_fh.close()
            Uniprot_vs_orgBlastDB_fh.close()
            ec_files_fh.close()
            orgJointBlast_fh.close()
            
 
            return orgJointBlast
        
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def getBlastScore(self):
    
        try:
        
            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                
                organismNameID, organismName = line.strip().split()
                
                orgRectifyBlast = NGS_Util.createFilePath(self.jointBlastDir, organismName + ".joint.blast")
                
                if not os.path.exists(orgRectifyBlast):
                    
                    print "getBlastScore:" + organismName
                    
                    org_blast_db = self.makeBlastDB(organismName)
                    
                    org_vs_UniprotBlastDB = self.blast_org_vs_uniprot(organismName)
                        
                    Uniprot_vs_orgBlastDB = self.blast_uniprot_vs_org(organismName)
                    
                    if (org_vs_UniprotBlastDB != "" and Uniprot_vs_orgBlastDB != ""):
                        orgJointBlast = self.combineBlast(organismName, org_vs_UniprotBlastDB, Uniprot_vs_orgBlastDB)
                    
                        if (orgJointBlast != ""):
                            orgRectifyBlast = self.rectifyBlast(organismName, orgJointBlast)
                          
            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #19
0
    def doMCLClustering(self, pfamID, abcFile, mclClusteringDir):

        try:

            mciFile, tabFile = self.makeClusterInputFiles(
                pfamID, abcFile, mclClusteringDir)

            I = 1.2

            for index in range(1, 10):

                output = NGS_Util.createFilePath(
                    mclClusteringDir,
                    pfamID + ".mci." + str(I).replace(".", ""))

                call = "mcl " + mciFile + " -I " + str(
                    I) + " -use-tab " + tabFile + " -o " + output

                I += 0.4

                print call

                NGS_Util.executeCall(call)

        except Exception:

            print traceback.print_exc()
    def concatenate_ClusterIPRScan_results(self, organismName):
    
        try:          
          
            #numberOfFragments = 10
            
            clusterProcessing = True

            for fragment in range(self.numberOfFragments):
                                           
                ipr_raw_file_split = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split_" +  str(fragment+1) + ".xml")
                
                if not os.path.exists(ipr_raw_file_split):                   
                    clusterProcessing = False
                    break
            
            if clusterProcessing:
                
                ipr_xml_file = self.mergeXML(organismName)
                return ipr_xml_file
                
            
            else:
                print "Interpro incomplete for: " +  organismName
                
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #21
0
    def makeClusterInputFiles(self, pfamID, abcFile, mclClusteringDir):

        try:

            mciFile = NGS_Util.createFilePath(mclClusteringDir,
                                              pfamID + ".mci")
            tabFile = NGS_Util.createFilePath(mclClusteringDir,
                                              pfamID + ".tab")

            call = "mcxload -abc " + abcFile + " --stream-mirror --stream-neg-log10 -stream-tf 'ceil(200)' -o " + mciFile + "  -write-tab " + tabFile

            NGS_Util.executeCall(call)

            return mciFile, tabFile

        except Exception:

            print traceback.print_exc()
Пример #22
0
    def getIPRScanScore(self):
    
        try:
        
            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                if line.startswith("#"):
                    continue
                organismNameID, organismName = line.strip().split()
                
                organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")

#                self.create_new_seq_org_list(organismName,organismNameID)
                
                if not os.path.exists(organism_IPR_final):

		    print "getIPRScanScore : " + organismName

                    org_ipr_split_dir = self.splitFiles(organismName)
                    ipr_raw_file = self.rawIPRScan(organismName,org_ipr_split_dir)
                    ipr_xml_file = self.rawIPRScanToXMlOutput( organismName, ipr_raw_file)

                    organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file)
                    organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go)
                    organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file)
                    
                   
                    if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec):

                        organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt")                        
                        organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt")
                        
                        NGS_Util.copyFile(ipr_raw_file, organism_raw_final)
                        NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final)


            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def blast_uniprot_vs_org(self, organismName):
    
        try:
        
            print "uniprot_vs_org_blast Blast: " + organismName
            
            org_blast_db = NGS_Util.createFilePath(self.orgBlastDBDir, organismName)

            Uniprot_vs_orgBlastDB = NGS_Util.createFilePath(self.orgBlastResDir, "up-vs-" + organismName+ ".blast")

            self.ngsBlast.blastP(org_blast_db, self.uniprot_fasta,6, Uniprot_vs_orgBlastDB, 10)
            
            return Uniprot_vs_orgBlastDB

        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def blast_org_vs_nr40_blast_formatted_11(self, organismName):
    
        try:
        
            print "blast_org_vs_nr40_blast_formatted_11: " + organismName

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa")

	    org_vs_nr40BlastDB_f11 = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2.txt")

	    self.ngsBlast.blastP(self.nrdb40_blast_db, org_fasta,  11, org_vs_nr40BlastDB_f11, 10)
	    
	    return org_vs_nr40BlastDB_f11

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def blast_org_vs_uniprot(self, organismName):
    
        try:
        
            print "org_vs_uniprot_blast: " + organismName

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            org_vs_UniprotBlastDB =  NGS_Util.createFilePath(self.orgBlastResDir, organismName+"-vs-up.blast")
            
            self.ngsBlast.blastP(self.uniprot_blast_db,org_fasta, 6 , org_vs_UniprotBlastDB, 10)
            
            return org_vs_UniprotBlastDB

        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def blast_org_vs_uniprot(self, organismName):
    
        try:
        
            print "org_vs_uniprot_blast: " + organismName

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            org_vs_UniprotBlastDB =  NGS_Util.createFilePath(self.orgBlastResDir, organismName+"-vs-up.blast")
            
            self.ngsBlast.blastP(self.uniprot_blast_db,org_fasta, 6 , org_vs_UniprotBlastDB, 10)
            
            return org_vs_UniprotBlastDB

        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def blast_uniprot_vs_org(self, organismName):
    
        try:
        
            print "uniprot_vs_org_blast Blast: " + organismName
            
            org_blast_db = NGS_Util.createFilePath(self.orgBlastDBDir, organismName)

            Uniprot_vs_orgBlastDB = NGS_Util.createFilePath(self.orgBlastResDir, "up-vs-" + organismName+ ".blast")

            self.ngsBlast.blastP(org_blast_db, self.uniprot_fasta,6, Uniprot_vs_orgBlastDB, 10)
            
            return Uniprot_vs_orgBlastDB

        except Exception:
            
            print traceback.print_exc()
            
        return ""
    def mergeXML(self,organismName):
    
        try:

            isFirst = True
            
            ipr_xml_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".xml")
            ipr_xml_file_fh = open(ipr_xml_file,"w")
            
            for srcFile in glob.glob( NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split_*") ):
        
                ipr_XML_split_fh = open(srcFile)
    
                for line in ipr_XML_split_fh:
                                        
                    if line.startswith("<?xml version=") or line.startswith("<protein-matches"):

                        if isFirst:
                            
                            ipr_xml_file_fh.write(line)
                            
                    elif not line.startswith("</protein-matches>"):
                        isFirst = False                         
                        ipr_xml_file_fh.write(line)


                ipr_XML_split_fh.close()


            ipr_xml_file_fh.write("</protein-matches>")
            ipr_xml_file_fh.close()
            
            
            return ipr_xml_file

     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #29
0
    def initialize(self, seq_org_list, jointBlastDir, GTGFungiKNNDir, fungi_InterProScan_result, phylogeneticTreeFile, modelTrainingDir):
    
        try:
        
	    self.seq_org_list                            = seq_org_list

	    self.jointBlastDir                           = jointBlastDir
	    self.GTGFungiKNNDir                          = GTGFungiKNNDir
	    self.fungi_InterProScan_result               = fungi_InterProScan_result	    

	    self.phylogeneticTreeFile                    = phylogeneticTreeFile
	    
	    self.modelTrainingDir                        = modelTrainingDir
	    self.modelTraining_IPR_EC_Dir                = NGS_Util.createDirectoryPath(self.modelTrainingDir, "IPR_EC")
	    self.modelTrainingBlastPVDir                 = NGS_Util.createDirectoryPath(self.modelTrainingDir, "BlastPValues")
	    self.modelTraining_EC_Scores_Dir             = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ECScores")    
	    self.modelTrainingProbabilityDensityScoreDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ProbabilityDensityScore")	    
	    self.modelTrainingTreeDir                    = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Tree")	    
	    self.modelTrainingModelDir                   = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Model")


	    NGS_Util.createDirectory(self.modelTrainingDir)
	    NGS_Util.createDirectory(self.modelTraining_IPR_EC_Dir)
	    NGS_Util.createDirectory(self.modelTrainingBlastPVDir)
	    NGS_Util.createDirectory(self.modelTraining_EC_Scores_Dir)
	    NGS_Util.createDirectory(self.modelTrainingProbabilityDensityScoreDir)
	    NGS_Util.createDirectory(self.modelTrainingTreeDir)
	    NGS_Util.createDirectory(self.modelTrainingModelDir)
	    
	    
	    if (os.path.exists(self.phylogeneticTreeFile)):
		
		NGS_Util.copyFile( self.phylogeneticTreeFile,NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree")   )
		self.phylogeneticTreeFile = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree")


	    self.treeCPDS = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree.cpds")
    
        except Exception:
            print traceback.print_exc()
    def splitFiles(self, organismName):
    
        try:
          
                                  
            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName+".faa")

            self.splitFasta.splitOrganismDataFile(organismName, org_fasta, self.numberOfFragments)

        except Exception:

	    print traceback.print_exc()

        return self.splitFasta.organismSplitDataDir
Пример #31
0
    def computeTreeProbabilityDensityScore(self):
       
        try:
        
            print "computeTreeProbabilityDensityScore"

	    call = "python " + ScriptsDir.ModelTrainingScripts_estimate_mutation_probability + " " + self.modelTraining_IPR_EC_Dir  + " " + self.phylogeneticTreeFile   + " " +  self.modelTrainingTreeDir
	    NGS_Util.executeCall(call)

	    self.treeCPDS = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree.cpds")

        except Exception:
            
            print traceback.print_exc()
Пример #32
0
    def rawIPRScan(self, organismName, org_ipr_split_dir):
    
        try:
        
            print "rawIPRScan: " + organismName
            
            #####self.raw_threaded_IPRScan(organismName, org_ipr_split_dir)
            self.raw_SingleRun_IPRScan(organismName, org_ipr_split_dir)
            
            ipr_raw_file_split = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split_*")
            
            ipr_raw_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".ipr.raw")
            
            call = "cat " + ipr_raw_file_split + " > " + ipr_raw_file

	    NGS_Util.executeCall(call)

            return ipr_raw_file
        
        except Exception:
            
            print traceback.print_exc()
        
        return ""
Пример #33
0
    def rawIPRScanToXMlOutput(self, organismName, ipr_raw_file):
    
        try:
        
            print "rawIPRScanToXMlOutput: " + organismName

	    ipr_xml_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".xml")
	    
	    self.ngsIPRScan.convert_raw_xml(ipr_raw_file, ipr_xml_file)
	    
	    return ipr_xml_file

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def xmlIPRScanToRAWOutput(self, organismName, ipr_xml_file):
    
        try:
        
            print "xmlIPRScanToRAWOutput: " + organismName
            
            ipr_raw_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".ipr.raw")
            
            self.ngsIPRScan.convert_iprscan5_xml_raw(ipr_xml_file, ipr_raw_file)
            
            return ipr_raw_file

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def blast_org_vs_nr40_blast_formatted_6(self, organismName, org_vs_nr40BlastDB_f11):
    
        try:
        
            print "blast_org_vs_nr40_blast_formatted_6: " + organismName

	    org_vs_nr40BlastDB_f6 = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2_6.txt")
	    
	    self.ngsBlast.blastFormatter(org_vs_nr40BlastDB_f11, 6, org_vs_nr40BlastDB_f6)
	    
	    return org_vs_nr40BlastDB_f6

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def rawIPRScanToXMlOutput(self, organismName, ipr_raw_file):
    
        try:
        
            print "rawIPRScanToXMlOutput: " + organismName

	    ipr_xml_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + ".xml")
	    
	    self.ngsIPRScan.convert_raw_xml(ipr_raw_file, ipr_xml_file)
	    
	    return ipr_xml_file

        except Exception:
            
            print traceback.print_exc()
        
        return ""
Пример #37
0
    def create_new_seq_org_list(self,organismName, organismID): 	    #(2) extract query information from blast fmt11. : .part1
    
        try:
        
            print "create_new_seq_org_list: " + organismName
   

            orgListFile_fh = open(self.seq_org_list)
            
            found = False
            
            for line in orgListFile_fh:
                    
                    if organismID in line:
                            found =  True
                            break
            
            orgListFile_fh.close
            
            
            if not found:
                    
                    org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa")
                    
                    org_fasta_fh = open(org_fasta)
    
                    orgListFile_fh = open(self.seq_org_list,"a")      #output file
    
                    for line in org_fasta_fh:
                            
                            if line.startswith(">"):
                                    
                                    if "|" in line:
                                            id = line.split()[0].split("|")[1]
                                    else:
                                            id = line.split(" ")[0]
                                            
                                    orgListFile_fh.write( id + "\t" +  organismID + "\n" )
                    
                    org_fasta_fh.close
                    orgListFile_fh.close
         
        except Exception:
            
            print traceback.print_exc()
    def reform_knn(self, organismName, org_gtg_knn): # (9) Add org and ecs

        try:
        
            print "reform_knn: " + organismName

	    org_gtg_knn_final = NGS_Util.createFilePath(self.GTGKNNDir, organismName + ".gtg.knn")
	    
            call = "python " + ScriptsDir.GTGScripts_reform_knn + " " +self.seq_org_list + " " + self.ec_files + " " + org_gtg_knn + " " + org_gtg_knn_final

	    NGS_Util.executeCall(call)
	    
	    return org_gtg_knn_final
	
        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def rectifyBlast(self, organismName, orgJointBlast):
    
        try:
        
            print "Rectify Blast: " + organismName
            
            orgRectifyBlast = NGS_Util.createFilePath(self.jointBlastDir, organismName + ".joint.blast")

            call = "python " + ScriptsDir.BlastScripts_rectify_blastresult +  " " + orgJointBlast + " " + orgRectifyBlast
            
            NGS_Util.executeCall(call)
            
            return orgRectifyBlast
    
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #40
0
    def extract_ipr2go_based_on_xml(self,organismName, ipr_xml_file):
    
        try:
        
            print "extract_ipr2go_based_on_xml: " + organismName

	    organism_ipr2go = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_ipr2go.txt")
	    
	    call = "python " + ScriptsDir.IPRScanScripts_ipr2go + " " + ipr_xml_file + " " + organism_ipr2go
	    
	    NGS_Util.executeCall(call)
	    
	    return organism_ipr2go

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def map_ipr_to_specific_ecs(self,organismName, organism_ipr2go):
    
        try:
        
            print "map_ipr_to_specific_ecs: " + organismName

	    organism_ipr2ec = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_ipr2ec.txt")
	    
	    call = "python " + ScriptsDir.IPRScanScripts_get_interpro_ecs + " " + self.ec2go + " " + organism_ipr2go + " " + organism_ipr2ec
	    
	    NGS_Util.executeCall(call)
	    
	    return organism_ipr2ec

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def rectifyBlast(self, organismName, orgJointBlast):
    
        try:
        
            print "Rectify Blast: " + organismName
            
            orgRectifyBlast = NGS_Util.createFilePath(self.jointBlastDir, organismName + ".joint.blast")

            call = "python " + ScriptsDir.BlastScripts_rectify_blastresult +  " " + orgJointBlast + " " + orgRectifyBlast
            
            NGS_Util.executeCall(call)
            
            return orgRectifyBlast
    
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #43
0
    def raw_split_IPRScan(self, organismName, organismSplitFile, splitNameIndex):
    
        try:
        
            print "raw_split_IPRScan: " + organismName + " " + organismSplitFile

            ipr_raw_file = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_split_" +  str(splitNameIndex) + ".ipr.raw")

            if not os.path.exists(ipr_raw_file):
                self.ngsIPRScan.protein_iprscan_to_raw_output(organismSplitFile, ipr_raw_file)

            return ipr_raw_file
        
        except Exception:
            
            print traceback.print_exc()
            print "error raw_split_IPRScan: " + organismName + " " + organismSplitFile
        
        return ""
    def extract_start_len_fmt11(self, organismName, org_vs_nr40BlastDB_f11): #(3) extract start, len and subject name from fmt11 : .part2
    
        try:
        
            print "extract_start_len_fmt11: " + organismName

	    org_vs_nr40BlastDB_f11_part2 = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2.part2")
	    
	    call = "python " + ScriptsDir.GTGScripts_extract_start_len_fmt11 + " " + org_vs_nr40BlastDB_f11 + " " + org_vs_nr40BlastDB_f11_part2
	    
	    NGS_Util.executeCall(call)
	    
	    return org_vs_nr40BlastDB_f11_part2

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def extract_combine_seq_start_len_fmt11(self, organismName, org_vs_nr40BlastDB_f11_part1, org_vs_nr40BlastDB_f11_part2): #(4) combine the result from previous two steps
    
        try:
        
            print "extract_combine_seq_start_len_fmt11: " + organismName

	    org_vs_nr40BlastDB_f11_part1_part2_result = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2.part1.part2.result")

	    call = "python " + ScriptsDir.GTGScripts_extract_combine_seq_start_len_fmt11 + " " + org_vs_nr40BlastDB_f11_part1 + " " + org_vs_nr40BlastDB_f11_part2 + " " + org_vs_nr40BlastDB_f11_part1_part2_result
	    
	    NGS_Util.executeCall(call)
	    
	    return org_vs_nr40BlastDB_f11_part1_part2_result
	    
	except Exception:    

            print traceback.print_exc()
        
        return ""
Пример #46
0
    def map_ipr_to_specific_ecs(self,organismName, organism_ipr2go):
    
        try:
        
            print "map_ipr_to_specific_ecs: " + organismName

	    organism_ipr2ec = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_ipr2ec.txt")
	    
	    call = "python " + ScriptsDir.IPRScanScripts_get_interpro_ecs + " " + self.ec2go + " " + organism_ipr2go + " " + organism_ipr2ec
	    
	    NGS_Util.executeCall(call)
	    
	    return organism_ipr2ec

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def getGTGScore(self):
    
        try:
        
            orgListFile_fh = open(self.orgListFile)

            for line in orgListFile_fh:
                if line.startswith("#"):
                    continue
                organismNameID, organismName = line.strip().split()
                
                org_gtg_knn_final = NGS_Util.createFilePath(self.GTGKNNDir, organismNameID + ".gtg.knn")
                
                if not os.path.exists(org_gtg_knn_final):
		    
		    print "getGTGScore : " +organismName
                          
		    org_vs_nr40BlastDB_f11 = self.blast_org_vs_nr40_blast_formatted_11( organismName)
		    org_vs_nr40BlastDB_f6  = self.blast_org_vs_nr40_blast_formatted_6( organismName, org_vs_nr40BlastDB_f11)
		
		    org_vs_nr40BlastDB_f11_part1 = self.extract_seq_fmt11( organismName, org_vs_nr40BlastDB_f11)
		    org_vs_nr40BlastDB_f11_part2 = self.extract_start_len_fmt11( organismName, org_vs_nr40BlastDB_f11)
		    org_vs_nr40BlastDB_f11_part1_part2_result = self.extract_combine_seq_start_len_fmt11( organismName, org_vs_nr40BlastDB_f11_part1, org_vs_nr40BlastDB_f11_part2)
    

		    org_vs_nr40BlastDB_result_final = self.reform( organismName, org_vs_nr40BlastDB_f6, org_vs_nr40BlastDB_f11_part1_part2_result)
		    
		    org_vs_nr40BlastDB_best_hit     = self.extract_best_hit( organismNameID, org_vs_nr40BlastDB_result_final)
		    
		    org_gtg                         = self.extract_gtg( organismNameID, org_vs_nr40BlastDB_best_hit)
		    
		    org_gtg_knn                     = self.gtgknn( organismNameID, org_gtg, self.numberNearestHits)
		    
		    org_gtg_knn_final               = self.reform_knn( organismNameID, org_gtg_knn)
			  
            orgListFile_fh.close() 
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #48
0
    def getBlastBitScoreMatrix(self, pfamID, blastResultFile):
    
        try:

            print "getBlastBitScoreMatrix"
            
            bitScoreMatrix = self.createBlastBitScoreMatrix(blastResultFile)
            
            if os.path.exists(blastResultFile):
                
                blastResultFile_fh = open(blastResultFile)
   
                line = ""            
                blastOutput = []
    
                for line in blastResultFile_fh:
                    
                    blastOutput = line.strip().split("\t")
                    
                    if len(blastOutput)>1:
    
                        sequenceID         = blastOutput[0]
                        matchingSequenceID = blastOutput[1]
                        bitScore           = float(blastOutput[11])
                    
                        if bitScoreMatrix[sequenceID][matchingSequenceID] < bitScore:
                            bitScoreMatrix[sequenceID][matchingSequenceID] = bitScore
                        
    
                blastResultFile_fh.close()
                
                bitScoreMatrixFile = NGS_Util.createFilePath(self.pfamBlastStaticticsDir, pfamID + "_AllvsALL_BlastBitScores.txt")
                
                self.writeMatrixToFile(bitScoreMatrix, bitScoreMatrixFile)
                
                return bitScoreMatrix
     
        except Exception:
            
            print traceback.print_exc()
            
        return ""
Пример #49
0
    def combine_iprscan_raw_result_with_ipr2ec(self, organismName, organism_ipr2ec, ipr_raw_file):  ### to be changes new_seq_org_list -> seq_org_list
    
        try:
        
            print "combine_iprscan_raw_result_with_ipr2ec: " + organismName

	    organism_seqid2ec = NGS_Util.createFilePath(self.orgIPRScanDir, organismName + "_seqid2ec.txt")
	    

	    call = "python " + ScriptsDir.IPRScanScripts_combineIPRwithECs + " " + organism_ipr2ec + " " + ipr_raw_file + " " + self.seq_org_list + " " + organism_seqid2ec
	    
	    NGS_Util.executeCall(call)
	    
	    return organism_seqid2ec

        except Exception:
            
            print traceback.print_exc()
        
        return ""
    def extract_seq_fmt11(self, organismName, org_vs_nr40BlastDB_f11): 	    #(2) extract query information from blast fmt11. : .part1
    
        try:
        
            print "extract_seq_fmt11: " + organismName


	    org_vs_nr40BlastDB_f11_part1 = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40_v2.part1")
	    
	    call = "python " + ScriptsDir.GTGScripts_extract_seq_fmt11 + " " + org_vs_nr40BlastDB_f11 + " " + org_vs_nr40BlastDB_f11_part1
	    
	    NGS_Util.executeCall(call)
	    
	    return org_vs_nr40BlastDB_f11_part1
	    
	except Exception:

            print traceback.print_exc()
        
        return ""
    def extract_best_hit(self, organismName, org_vs_nr40BlastDB_result_final): #(6) Extract the best hit for each query seq.
    
        try:
        
            print "extract_best_hit: " + organismName

	    org_vs_nr40BlastDB_best_hit = NGS_Util.createFilePath(self.GTGBestHitsDir, organismName + ".nrdb40.best_hit")
	    
	    call = "python " + ScriptsDir.GTGScripts_extract_best_hit + " " + org_vs_nr40BlastDB_result_final  + " " + org_vs_nr40BlastDB_best_hit
	    
	    NGS_Util.executeCall(call)
	    
	    return org_vs_nr40BlastDB_best_hit
	    

        except Exception:	    
            
            print traceback.print_exc()
        
        return ""
    def reform(self, organismName, org_vs_nr40BlastDB_f6, org_vs_nr40BlastDB_f11_part1_part2_result): #(5) extract and reform based on $name.nrdb40_v2_6.txt and $name.nrdb40_v2.txt: should give you the sample output below
    
        try:
        
            print "reform: " + organismName

	    org_vs_nr40BlastDB_result_final = NGS_Util.createFilePath(self.orgGTGBlastResDir, organismName + ".nrdb40.result.final")

	    call = "python " + ScriptsDir.GTGScripts_reform + " " + org_vs_nr40BlastDB_f6  + " " + org_vs_nr40BlastDB_f11_part1_part2_result + " " + org_vs_nr40BlastDB_result_final
	    
	    NGS_Util.executeCall(call)
	    
	    return org_vs_nr40BlastDB_result_final
	    

	except Exception:

            print traceback.print_exc()
        
        return ""
Пример #53
0
    def splitFiles(self, organismName):
    
        try:
        
            print "splitFiles: " + organismName

            org_fasta = NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa")

            org_ipr_split_dir = NGS_Util.createDirectoryPath(self.orgIPRScanDir, organismName)

            call = "sh " + ScriptsDir.IPRScanScripts_fsplit + " " + org_fasta + " " + org_ipr_split_dir

	    NGS_Util.executeCall(call)
            
            return org_ipr_split_dir
        
        except Exception:
            
            print traceback.print_exc()
        
        return ""