def copySequenceFiles(self, srcDataDir): try: print("Copy Fasta Files from %s to %s" %(srcDataDir,self.orgFastaDir)) orgListFile_fh = open(self.orgListFile) for line in orgListFile_fh: organismNameID, organismName = line.strip().split() if not os.path.exists( NGS_Util.createFilePath(self.orgFastaDir, organismName + ".faa") ): orgFasta = NGS_Util.createFilePath(srcDataDir, organismName + ".faa") NGS_Util.copyFile(orgFasta, self.orgFastaDir) print("Copied fasta file for %s" % (organismName)) else: print("\tDoing nothing (files already copied) for %s" % (organismName)) orgListFile_fh.close() except Exception: print traceback.print_exc()
def getIPRScanScore(self, mode): try: print "getIPRScanScore" orgListFile_fh = open(self.orgListFile) for line in orgListFile_fh: organismNameID, organismName = line.strip().split() organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt") if not os.path.exists(organism_IPR_final): print "getIPRScanScore : " + organismName if mode == 1: self.runClusterIPRScan(organismName) time.sleep(21600) # sleep for 6 hrs elif mode == 2: ipr_xml_file = self.concatenate_ClusterIPRScan_results(organismName) ipr_raw_file = self.xmlIPRScanToRAWOutput(organismName, ipr_xml_file) organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file) organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go) organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file) if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec): organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt") organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt") NGS_Util.copyFile(ipr_raw_file, organism_raw_final) NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final) orgListFile_fh.close() except Exception: print traceback.print_exc() return ""
def getIPRScanScore(self): try: orgListFile_fh = open(self.orgListFile) for line in orgListFile_fh: if line.startswith("#"): continue organismNameID, organismName = line.strip().split() organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt") # self.create_new_seq_org_list(organismName,organismNameID) if not os.path.exists(organism_IPR_final): print "getIPRScanScore : " + organismName org_ipr_split_dir = self.splitFiles(organismName) ipr_raw_file = self.rawIPRScan(organismName,org_ipr_split_dir) ipr_xml_file = self.rawIPRScanToXMlOutput( organismName, ipr_raw_file) organism_ipr2go = self.extract_ipr2go_based_on_xml(organismName, ipr_xml_file) organism_ipr2ec = self.map_ipr_to_specific_ecs(organismName, organism_ipr2go) organism_seqid2ec = self.combine_iprscan_raw_result_with_ipr2ec( organismName, organism_ipr2ec, ipr_raw_file) if os.path.exists(ipr_raw_file) and os.path.exists(organism_seqid2ec): organism_raw_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.raw.txt") organism_IPR_final = NGS_Util.createFilePath(self.fungi_InterProScan_result, organismName + ".faa.IPR.final.txt") NGS_Util.copyFile(ipr_raw_file, organism_raw_final) NGS_Util.copyFile(organism_seqid2ec, organism_IPR_final) orgListFile_fh.close() except Exception: print traceback.print_exc() return ""
def initialize(self, seq_org_list, jointBlastDir, GTGFungiKNNDir, fungi_InterProScan_result, phylogeneticTreeFile, modelTrainingDir): try: self.seq_org_list = seq_org_list self.jointBlastDir = jointBlastDir self.GTGFungiKNNDir = GTGFungiKNNDir self.fungi_InterProScan_result = fungi_InterProScan_result self.phylogeneticTreeFile = phylogeneticTreeFile self.modelTrainingDir = modelTrainingDir self.modelTraining_IPR_EC_Dir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "IPR_EC") self.modelTrainingBlastPVDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "BlastPValues") self.modelTraining_EC_Scores_Dir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ECScores") self.modelTrainingProbabilityDensityScoreDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "ProbabilityDensityScore") self.modelTrainingTreeDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Tree") self.modelTrainingModelDir = NGS_Util.createDirectoryPath(self.modelTrainingDir, "Model") NGS_Util.createDirectory(self.modelTrainingDir) NGS_Util.createDirectory(self.modelTraining_IPR_EC_Dir) NGS_Util.createDirectory(self.modelTrainingBlastPVDir) NGS_Util.createDirectory(self.modelTraining_EC_Scores_Dir) NGS_Util.createDirectory(self.modelTrainingProbabilityDensityScoreDir) NGS_Util.createDirectory(self.modelTrainingTreeDir) NGS_Util.createDirectory(self.modelTrainingModelDir) if (os.path.exists(self.phylogeneticTreeFile)): NGS_Util.copyFile( self.phylogeneticTreeFile,NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree") ) self.phylogeneticTreeFile = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree") self.treeCPDS = NGS_Util.createFilePath(self.modelTrainingTreeDir,"tree.cpds") except Exception: print traceback.print_exc()