aligner.align(fastaFileName) elif options._sample: try: plugin = findPlugin(options.assignment, 'sap.assignment') except PluginNotFoundError, X: raise AnalysisTerminated( 1, "The plugin or file %s was not found." % X.plugin) assignment = plugin.Assignment(options) for alignmentFileName in args: try: assignment.run(alignmentFileName) except plugin.AssignmentError, X: print X.msg elif options._stats: treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics(args, generateSummary=False) # ####################################### # if options.ghostpopulation: # ima = IMa.Assignment(options) # ima.run(args) # ####################################### else: # Check that netblast and clustalw2 are installed: from UtilityFunctions import findOnSystem missing = False if os.name in ('nt', 'dos'): name = 'blastn.exe'
def _resultProducer(self, jobID, abortEvent, inputFiles): try: sys.stdout = OutputEnqueue() global optionParser # Make directories and write fixed inputfiles: init = Initialize(optionParser.options) init.createDirs() inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput( inputFiles) init.checkCacheConsistency(inputFiles) fastaFileBaseNames = [] try: alignmentPlugin = findPlugin(optionParser.options.alignment, 'SAP.alignment') except PluginNotFoundError: exec("from SAP.Alignment import %s as alignmentPlugin" % optionParser.options.alignment) aligner = alignmentPlugin.Aligner(optionParser.options) try: assignmentPlugin = findPlugin(optionParser.options.assignment, 'SAP.assignment') except PluginNotFoundError: exec("from SAP.Assignment import %s as assignmentPlugin" % optionParser.options.assignment) assignment = assignmentPlugin.Assignment(optionParser.options) uniqueDict = {} copyLaterDict = {} homolcompiler = HomolCompiler(optionParser.options) inputQueryNames = {} # For each fasta file execute pipeline for fastaFileName in inputFiles: fastaFile = open(fastaFileName, 'r') fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser()) fastaFileBaseName = os.path.splitext( os.path.basename(fastaFileName))[0] fastaFileBaseNames.append(fastaFileBaseName) if abortEvent(): return jobID inputQueryNames[fastaFileBaseName] = {} for fastaRecord in fastaIterator: # Discard the header except for the first id word: fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1) inputQueryNames[fastaFileBaseName][ fastaRecord.title] = True print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title) # See if the sequence is been encountered before and if so skip it for now: if uniqueDict.has_key(fastaRecord.sequence): copyLaterDict.setdefault( uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title)) print '\tsequence double - skipping...\n' continue else: uniqueDict[fastaRecord.sequence] = '%s_%s' % ( fastaFileBaseName, fastaRecord.title) # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache homologyResult = homolcompiler.compileHomologueSet( fastaRecord, fastaFileBaseName) if abortEvent(): return jobID if homologyResult != None: # The homologyResult object serves as a job carrying the relevant information. aligner.align( os.path.join(optionParser.options.homologcache, homologyResult.homologuesFileName)) if abortEvent(): return jobID try: assignment.run( os.path.join( optionParser.options.alignmentcache, homologyResult.alignmentFileName)) except assignmentPlugin.AssignmentError, X: print X.msg if abortEvent(): return jobID treeStatistics = TreeStatistics(optionParser.options) treeStatistics.runTreeStatistics([ os.path.join( optionParser.options.homologcache, homologyResult.homologuesPickleFileName) ], generateSummary=False) if abortEvent(): return jobID fastaFile.close() if abortEvent(): return jobID # # Calculate the pairwise differences between sequences in each file: # if optionParser.options.diffs: # pairwisediffs = PairWiseDiffs(optionParser.options) # pairwisediffs.runPairWiseDiffs(inputFiles) # #runPairWiseDiffs(inputFiles) # # if abortEvent(): # return jobID # Make dictionary to map doubles the ones analyzed: doubleToAnalyzedDict = {} for k, l in copyLaterDict.items(): doubleToAnalyzedDict.update(dict([[v, k] for v in l])) if not optionParser.options.nocopycache and len( doubleToAnalyzedDict): # Copy cache files for sequences that occoured more than once: print "Copying cached results for %d doubles" % len( doubleToAnalyzedDict) copyCacheForSequenceDoubles(copyLaterDict, optionParser.options) # Calculate the pairwise differences between sequences in each file: if optionParser.options.diffs: pairwisediffs = PairWiseDiffs(optionParser.options) pairwisediffs.runPairWiseDiffs(inputFiles) if abortEvent(): return jobID # Summary tree stats: print 'Computing tree statistics summary...' treeStatistics = TreeStatistics(optionParser.options) treeStatistics.runTreeStatistics( inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames) print "done" if abortEvent(): return jobID # Make HTML output: print '\tGenerating HTML output...' resultHTML = ResultHTML(optionParser.options) resultHTML.webify( [optionParser.options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap) print 'done' return jobID
def _resultProducer(self, jobID, abortEvent, inputFiles): try: sys.stdout = OutputEnqueue() global optionParser # Make directories and write fixed inputfiles: init = Initialize(optionParser.options) init.createDirs() inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(inputFiles) init.checkCacheConsistency(inputFiles) fastaFileBaseNames = [] try: alignmentPlugin = findPlugin(optionParser.options.alignment, 'SAP.alignment') except PluginNotFoundError: exec("from SAP.Alignment import %s as alignmentPlugin" % optionParser.options.alignment) aligner = alignmentPlugin.Aligner(optionParser.options) try: assignmentPlugin = findPlugin(optionParser.options.assignment, 'SAP.assignment') except PluginNotFoundError: exec("from SAP.Assignment import %s as assignmentPlugin" % optionParser.options.assignment) assignment = assignmentPlugin.Assignment(optionParser.options) uniqueDict = {} copyLaterDict = {} homolcompiler = HomolCompiler(optionParser.options) inputQueryNames = {} # For each fasta file execute pipeline for fastaFileName in inputFiles: fastaFile = open(fastaFileName, 'r') fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser()) fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0] fastaFileBaseNames.append(fastaFileBaseName) if abortEvent(): return jobID inputQueryNames[fastaFileBaseName] = {} for fastaRecord in fastaIterator: # Discard the header except for the first id word: fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1) inputQueryNames[fastaFileBaseName][fastaRecord.title] = True print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title) # See if the sequence is been encountered before and if so skip it for now: if uniqueDict.has_key(fastaRecord.sequence): copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title)) print '\tsequence double - skipping...\n' continue else: uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title) # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName) if abortEvent(): return jobID if homologyResult != None: # The homologyResult object serves as a job carrying the relevant information. aligner.align(os.path.join(optionParser.options.homologcache, homologyResult.homologuesFileName)) if abortEvent(): return jobID try: assignment.run(os.path.join(optionParser.options.alignmentcache, homologyResult.alignmentFileName)) except assignmentPlugin.AssignmentError, X: print X.msg if abortEvent(): return jobID treeStatistics = TreeStatistics(optionParser.options) treeStatistics.runTreeStatistics([os.path.join(optionParser.options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False) if abortEvent(): return jobID fastaFile.close() if abortEvent(): return jobID # # Calculate the pairwise differences between sequences in each file: # if optionParser.options.diffs: # pairwisediffs = PairWiseDiffs(optionParser.options) # pairwisediffs.runPairWiseDiffs(inputFiles) # #runPairWiseDiffs(inputFiles) # # if abortEvent(): # return jobID # Make dictionary to map doubles the ones analyzed: doubleToAnalyzedDict = {} for k, l in copyLaterDict.items(): doubleToAnalyzedDict.update(dict([[v,k] for v in l])) if not optionParser.options.nocopycache and len(doubleToAnalyzedDict): # Copy cache files for sequences that occoured more than once: print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict) copyCacheForSequenceDoubles(copyLaterDict, optionParser.options) # Calculate the pairwise differences between sequences in each file: if optionParser.options.diffs: pairwisediffs = PairWiseDiffs(optionParser.options) pairwisediffs.runPairWiseDiffs(inputFiles) if abortEvent(): return jobID # Summary tree stats: print 'Computing tree statistics summary...' treeStatistics = TreeStatistics(optionParser.options) treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames) print "done" if abortEvent(): return jobID # Make HTML output: print '\tGenerating HTML output...' resultHTML = ResultHTML(optionParser.options) resultHTML.webify([optionParser.options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap) print 'done' return jobID
for fastaFileName in args: aligner.align(fastaFileName) elif options._sample: try: plugin = findPlugin(options.assignment, 'sap.assignment') except PluginNotFoundError, X: raise AnalysisTerminated(1, "The plugin or file %s was not found." % X.plugin) assignment = plugin.Assignment(options) for alignmentFileName in args: try: assignment.run(alignmentFileName) except plugin.AssignmentError, X: print X.msg elif options._stats: treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics(args, generateSummary=False) # ####################################### # if options.ghostpopulation: # ima = IMa.Assignment(options) # ima.run(args) # ####################################### else: # Check that netblast and clustalw2 are installed: from UtilityFunctions import findOnSystem missing = False if os.name in ('nt', 'dos'): name = 'blastn.exe'
def run_analysis(self, input_file, options, stdout_file, stderr_file, email): class RedirectStdStreams(object): def __init__(self, stdout=None, stderr=None): if stdout is not None: stdout = open(stdout, 'w') if stderr is not None: stderr = open(stderr, 'w') self.stdout = stdout self.stderr = stderr self._stdout = stdout or sys.stdout self._stderr = stderr or sys.stderr def __enter__(self): self.old_stdout, self.old_stderr = sys.stdout, sys.stderr self.old_stdout.flush() self.old_stderr.flush() sys.stdout, sys.stderr = self._stdout, self._stderr def __exit__(self, exc_type, exc_value, traceback): self._stdout.flush() self._stderr.flush() if sys.stdout is self.stdout: sys.stdout.close() if sys.stderr is self.stderr: sys.stderr.close() sys.stdout = self.old_stdout sys.stderr = self.old_stderr with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file): # Make directories and write fixed inputfiles: init = Initialize(options) init.createDirs() inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput( [input_file]) init.checkCacheConsistency(inputFiles) progress = 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) fastaFileBaseNames = [] try: alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment') except PluginNotFoundError: from SAP.Alignment import Clustalw2 as alignmentPlugin # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment) aligner = alignmentPlugin.Aligner(options) try: assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment') except PluginNotFoundError: if options.assignment == "Barcoder": from SAP.Assignment import Barcoder as assignmentPlugin elif options.assignment == "ConstrainedNJ": from SAP.Assignment import ConstrainedNJ as assignmentPlugin else: assert 0 # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment) assignment = assignmentPlugin.Assignment(options) uniqueDict = {} copyLaterDict = {} homolcompiler = HomolCompiler(options) inputQueryNames = {} # For each fasta file execute pipeline for fastaFileName in inputFiles: fastaFile = open(fastaFileName, 'r') fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser()) fastaFileBaseName = os.path.splitext( os.path.basename(fastaFileName))[0] fastaFileBaseNames.append(fastaFileBaseName) inputQueryNames[fastaFileBaseName] = {} for fastaRecord in fastaIterator: # Discard the header except for the first id word: fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1) app.logger.info("file: {}, query: {}".format( fastaFileBaseName, fastaRecord.title)) inputQueryNames[fastaFileBaseName][fastaRecord.title] = True print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title) # See if the sequence is been encountered before and if so skip it for now: if uniqueDict.has_key(fastaRecord.sequence): copyLaterDict.setdefault( uniqueDict[fastaRecord.sequence], []).append( '%s_%s' % (fastaFileBaseName, fastaRecord.title)) print '\tsequence double - skipping...\n' continue else: uniqueDict[fastaRecord.sequence] = '%s_%s' % ( fastaFileBaseName, fastaRecord.title) # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache homologyResult = homolcompiler.compileHomologueSet( fastaRecord, fastaFileBaseName) progress += 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) if homologyResult is not None: # The homologyResult object serves as a job carrying the relevant information. aligner.align( os.path.join(options.homologcache, homologyResult.homologuesFileName)) progress += 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) try: assignment.run( os.path.join(options.alignmentcache, homologyResult.alignmentFileName)) except assignmentPlugin.AssignmentError, X: print X.msg progress += 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics([ os.path.join(options.homologcache, homologyResult.homologuesPickleFileName) ], generateSummary=False) progress += 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) else: progress += 3 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) fastaFile.close() # Make dictionary to map doubles the ones analyzed: doubleToAnalyzedDict = {} for k, l in copyLaterDict.items(): doubleToAnalyzedDict.update(dict([[v, k] for v in l])) if not options.nocopycache and len(doubleToAnalyzedDict): # Copy cache files for sequences that occoured more than once: print "Copying cached results for %d doubles" % len( doubleToAnalyzedDict) copyCacheForSequenceDoubles(copyLaterDict, options) # Calculate the pairwise differences between sequences in each file: if options.diffs: pairwisediffs = PairWiseDiffs(options) pairwisediffs.runPairWiseDiffs(inputFiles) # Summary tree stats: print 'Computing tree statistics summary...' treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics( inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames) print "done" progress += 1 self.update_state(state='PROGRESS', meta={ 'current': progress, 'total': seqCount * 4 + 2 }) # Make HTML output: print '\tGenerating HTML output...' resultHTML = ResultHTML(options) resultHTML.webify([options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap) print 'done' # clean up files we won't need anyway shutil.rmtree(options.datadir) shutil.rmtree(options.homologcache) shutil.rmtree(options.blastcache) shutil.rmtree(options.dbcache) shutil.rmtree(options.treescache) shutil.rmtree(options.alignmentcache)
def run_analysis(self, input_file, options, stdout_file, stderr_file, email): class RedirectStdStreams(object): def __init__(self, stdout=None, stderr=None): if stdout is not None: stdout = open(stdout, 'w') if stderr is not None: stderr = open(stderr, 'w') self.stdout = stdout self.stderr = stderr self._stdout = stdout or sys.stdout self._stderr = stderr or sys.stderr def __enter__(self): self.old_stdout, self.old_stderr = sys.stdout, sys.stderr self.old_stdout.flush() self.old_stderr.flush() sys.stdout, sys.stderr = self._stdout, self._stderr def __exit__(self, exc_type, exc_value, traceback): self._stdout.flush(); self._stderr.flush() if sys.stdout is self.stdout: sys.stdout.close() if sys.stderr is self.stderr: sys.stderr.close() sys.stdout = self.old_stdout sys.stderr = self.old_stderr with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file): # Make directories and write fixed inputfiles: init = Initialize(options) init.createDirs() inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput([input_file]) init.checkCacheConsistency(inputFiles) progress = 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) fastaFileBaseNames = [] try: alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment') except PluginNotFoundError: from SAP.Alignment import Clustalw2 as alignmentPlugin # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment) aligner = alignmentPlugin.Aligner(options) try: assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment') except PluginNotFoundError: if options.assignment == "Barcoder": from SAP.Assignment import Barcoder as assignmentPlugin elif options.assignment == "ConstrainedNJ": from SAP.Assignment import ConstrainedNJ as assignmentPlugin else: assert 0 # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment) assignment = assignmentPlugin.Assignment(options) uniqueDict = {} copyLaterDict = {} homolcompiler = HomolCompiler(options) inputQueryNames = {} # For each fasta file execute pipeline for fastaFileName in inputFiles: fastaFile = open(fastaFileName, 'r') fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser()) fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0] fastaFileBaseNames.append(fastaFileBaseName) inputQueryNames[fastaFileBaseName] = {} for fastaRecord in fastaIterator: # Discard the header except for the first id word: fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1) app.logger.info("file: {}, query: {}".format(fastaFileBaseName, fastaRecord.title)) inputQueryNames[fastaFileBaseName][fastaRecord.title] = True print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title) # See if the sequence is been encountered before and if so skip it for now: if uniqueDict.has_key(fastaRecord.sequence): copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title)) print '\tsequence double - skipping...\n' continue else: uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title) # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName) progress += 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) if homologyResult is not None: # The homologyResult object serves as a job carrying the relevant information. aligner.align(os.path.join(options.homologcache, homologyResult.homologuesFileName)) progress += 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) try: assignment.run(os.path.join(options.alignmentcache, homologyResult.alignmentFileName)) except assignmentPlugin.AssignmentError, X: print X.msg progress += 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics([os.path.join(options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False) progress += 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) else: progress += 3 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) fastaFile.close() # Make dictionary to map doubles the ones analyzed: doubleToAnalyzedDict = {} for k, l in copyLaterDict.items(): doubleToAnalyzedDict.update(dict([[v,k] for v in l])) if not options.nocopycache and len(doubleToAnalyzedDict): # Copy cache files for sequences that occoured more than once: print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict) copyCacheForSequenceDoubles(copyLaterDict, options) # Calculate the pairwise differences between sequences in each file: if options.diffs: pairwisediffs = PairWiseDiffs(options) pairwisediffs.runPairWiseDiffs(inputFiles) # Summary tree stats: print 'Computing tree statistics summary...' treeStatistics = TreeStatistics(options) treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames) print "done" progress += 1 self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2}) # Make HTML output: print '\tGenerating HTML output...' resultHTML = ResultHTML(options) resultHTML.webify([options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap) print 'done' # clean up files we won't need anyway shutil.rmtree(options.datadir) shutil.rmtree(options.homologcache) shutil.rmtree(options.blastcache) shutil.rmtree(options.dbcache) shutil.rmtree(options.treescache) shutil.rmtree(options.alignmentcache)