示例#1
0
                aligner.align(fastaFileName)
        elif options._sample:
            try:
                plugin = findPlugin(options.assignment, 'sap.assignment')
            except PluginNotFoundError, X:
                raise AnalysisTerminated(
                    1, "The plugin or file %s was not found." % X.plugin)
            assignment = plugin.Assignment(options)
            for alignmentFileName in args:
                try:
                    assignment.run(alignmentFileName)
                except plugin.AssignmentError, X:
                    print X.msg

        elif options._stats:
            treeStatistics = TreeStatistics(options)
            treeStatistics.runTreeStatistics(args, generateSummary=False)

#             #######################################
#             if options.ghostpopulation:
#                 ima = IMa.Assignment(options)
#                 ima.run(args)
#             #######################################

        else:

            # Check that netblast and clustalw2 are installed:
            from UtilityFunctions import findOnSystem
            missing = False
            if os.name in ('nt', 'dos'):
                name = 'blastn.exe'
示例#2
0
    def _resultProducer(self, jobID, abortEvent, inputFiles):

        try:

            sys.stdout = OutputEnqueue()

            global optionParser

            # Make directories and write fixed inputfiles:
            init = Initialize(optionParser.options)
            init.createDirs()

            inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(
                inputFiles)
            init.checkCacheConsistency(inputFiles)

            fastaFileBaseNames = []

            try:
                alignmentPlugin = findPlugin(optionParser.options.alignment,
                                             'SAP.alignment')
            except PluginNotFoundError:
                exec("from SAP.Alignment import %s as alignmentPlugin" %
                     optionParser.options.alignment)
            aligner = alignmentPlugin.Aligner(optionParser.options)

            try:
                assignmentPlugin = findPlugin(optionParser.options.assignment,
                                              'SAP.assignment')
            except PluginNotFoundError:
                exec("from SAP.Assignment import %s as assignmentPlugin" %
                     optionParser.options.assignment)
            assignment = assignmentPlugin.Assignment(optionParser.options)

            uniqueDict = {}
            copyLaterDict = {}

            homolcompiler = HomolCompiler(optionParser.options)

            inputQueryNames = {}

            # For each fasta file execute pipeline
            for fastaFileName in inputFiles:

                fastaFile = open(fastaFileName, 'r')
                fastaIterator = Fasta.Iterator(fastaFile,
                                               parser=Fasta.RecordParser())
                fastaFileBaseName = os.path.splitext(
                    os.path.basename(fastaFileName))[0]
                fastaFileBaseNames.append(fastaFileBaseName)

                if abortEvent():
                    return jobID

                inputQueryNames[fastaFileBaseName] = {}

                for fastaRecord in fastaIterator:

                    # Discard the header except for the first id word:
                    fastaRecord.title = re.search(r'^(\S+)',
                                                  fastaRecord.title).group(1)

                    inputQueryNames[fastaFileBaseName][
                        fastaRecord.title] = True

                    print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                    # See if the sequence is been encountered before and if so skip it for now:
                    if uniqueDict.has_key(fastaRecord.sequence):
                        copyLaterDict.setdefault(
                            uniqueDict[fastaRecord.sequence],
                            []).append('%s_%s' %
                                       (fastaFileBaseName, fastaRecord.title))
                        print '\tsequence double - skipping...\n'
                        continue
                    else:
                        uniqueDict[fastaRecord.sequence] = '%s_%s' % (
                            fastaFileBaseName, fastaRecord.title)

                    # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                    homologyResult = homolcompiler.compileHomologueSet(
                        fastaRecord, fastaFileBaseName)

                    if abortEvent():
                        return jobID

                    if homologyResult != None:
                        # The homologyResult object serves as a job carrying the relevant information.

                        aligner.align(
                            os.path.join(optionParser.options.homologcache,
                                         homologyResult.homologuesFileName))

                        if abortEvent():
                            return jobID

                        try:
                            assignment.run(
                                os.path.join(
                                    optionParser.options.alignmentcache,
                                    homologyResult.alignmentFileName))
                        except assignmentPlugin.AssignmentError, X:
                            print X.msg

                        if abortEvent():
                            return jobID

                        treeStatistics = TreeStatistics(optionParser.options)
                        treeStatistics.runTreeStatistics([
                            os.path.join(
                                optionParser.options.homologcache,
                                homologyResult.homologuesPickleFileName)
                        ],
                                                         generateSummary=False)

                        if abortEvent():
                            return jobID

                fastaFile.close()

            if abortEvent():
                return jobID


#             # Calculate the pairwise differences between sequences in each file:
#             if optionParser.options.diffs:
#                 pairwisediffs = PairWiseDiffs(optionParser.options)
#                 pairwisediffs.runPairWiseDiffs(inputFiles)
#                 #runPairWiseDiffs(inputFiles)
#
#             if abortEvent():
#                 return jobID

# Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v, k] for v in l]))

            if not optionParser.options.nocopycache and len(
                    doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(
                    doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict,
                                            optionParser.options)

            # Calculate the pairwise differences between sequences in each file:
            if optionParser.options.diffs:
                pairwisediffs = PairWiseDiffs(optionParser.options)
                pairwisediffs.runPairWiseDiffs(inputFiles)

            if abortEvent():
                return jobID

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(optionParser.options)
            treeStatistics.runTreeStatistics(
                inputFiles,
                generateSummary=True,
                doubleToAnalyzedDict=doubleToAnalyzedDict,
                inputQueryNames=inputQueryNames)
            print "done"

            if abortEvent():
                return jobID

            # Make HTML output:
            print '\tGenerating HTML output...'

            resultHTML = ResultHTML(optionParser.options)
            resultHTML.webify(
                [optionParser.options.treestatscache + '/summary.pickle'],
                fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
            print 'done'

            return jobID
示例#3
0
文件: GUI.py 项目: kaspermunch/sap
    def _resultProducer(self, jobID, abortEvent, inputFiles):

        try:

            sys.stdout = OutputEnqueue()

            global optionParser

            # Make directories and write fixed inputfiles:
            init = Initialize(optionParser.options)
            init.createDirs()
    
            inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(inputFiles)
            init.checkCacheConsistency(inputFiles)
        
            fastaFileBaseNames = []
    
            try:
               alignmentPlugin = findPlugin(optionParser.options.alignment, 'SAP.alignment')
            except PluginNotFoundError:
               exec("from SAP.Alignment import %s as alignmentPlugin" % optionParser.options.alignment)
            aligner = alignmentPlugin.Aligner(optionParser.options)

            try:
               assignmentPlugin = findPlugin(optionParser.options.assignment, 'SAP.assignment')
            except PluginNotFoundError:
               exec("from SAP.Assignment import %s as assignmentPlugin" % optionParser.options.assignment)
            assignment = assignmentPlugin.Assignment(optionParser.options)


            uniqueDict = {}
            copyLaterDict = {}

            homolcompiler = HomolCompiler(optionParser.options)

            inputQueryNames = {}
    
            # For each fasta file execute pipeline
            for fastaFileName in inputFiles:
    
                fastaFile = open(fastaFileName, 'r')
                fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
                fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0]
                fastaFileBaseNames.append(fastaFileBaseName)
    
                if abortEvent():
                    return jobID

                inputQueryNames[fastaFileBaseName] = {}
                
                for fastaRecord in fastaIterator:

                    
                    # Discard the header except for the first id word:
                    fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1)

                    inputQueryNames[fastaFileBaseName][fastaRecord.title] = True
    
                    print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)
    
                    # See if the sequence is been encountered before and if so skip it for now:
                    if uniqueDict.has_key(fastaRecord.sequence):
                        copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title))
                        print '\tsequence double - skipping...\n'
                        continue
                    else:
                        uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title)
    
                    # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                    homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName)

                    if abortEvent():
                        return jobID
    
                    if homologyResult != None:
                        # The homologyResult object serves as a job carrying the relevant information.

                        aligner.align(os.path.join(optionParser.options.homologcache, homologyResult.homologuesFileName))
    
                        if abortEvent():
                            return jobID
                         
                        try:
                           assignment.run(os.path.join(optionParser.options.alignmentcache, homologyResult.alignmentFileName))
                        except assignmentPlugin.AssignmentError, X:
                           print X.msg
    
                        if abortEvent():
                            return jobID
    
                        treeStatistics = TreeStatistics(optionParser.options)
                        treeStatistics.runTreeStatistics([os.path.join(optionParser.options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False)

                        if abortEvent():
                            return jobID                        

                fastaFile.close()
    
            if abortEvent():
                return jobID
    
#             # Calculate the pairwise differences between sequences in each file:
#             if optionParser.options.diffs:
#                 pairwisediffs = PairWiseDiffs(optionParser.options)
#                 pairwisediffs.runPairWiseDiffs(inputFiles)
#                 #runPairWiseDiffs(inputFiles)
#     
#             if abortEvent():
#                 return jobID
    
    
            # Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v,k] for v in l]))
    
            if not optionParser.options.nocopycache and len(doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict, optionParser.options)
                
            # Calculate the pairwise differences between sequences in each file:
            if optionParser.options.diffs:
                pairwisediffs = PairWiseDiffs(optionParser.options)
                pairwisediffs.runPairWiseDiffs(inputFiles)

            if abortEvent():
                return jobID

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(optionParser.options)
            treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames)
            print "done"
    
            if abortEvent():
                return jobID
    
            # Make HTML output:
            print '\tGenerating HTML output...'
    
            resultHTML = ResultHTML(optionParser.options)
            resultHTML.webify([optionParser.options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
            print 'done'
    
            return jobID
示例#4
0
            for fastaFileName in args:
                aligner.align(fastaFileName)
        elif options._sample:
            try:
                plugin = findPlugin(options.assignment, 'sap.assignment')
            except PluginNotFoundError, X:
                raise AnalysisTerminated(1, "The plugin or file %s was not found." % X.plugin)
            assignment = plugin.Assignment(options)
            for alignmentFileName in args:
                try:
                    assignment.run(alignmentFileName)
                except plugin.AssignmentError, X:
                    print X.msg

        elif options._stats:
            treeStatistics = TreeStatistics(options)
            treeStatistics.runTreeStatistics(args, generateSummary=False)

#             #######################################
#             if options.ghostpopulation:
#                 ima = IMa.Assignment(options)
#                 ima.run(args)
#             #######################################

        else:

            # Check that netblast and clustalw2 are installed:
            from UtilityFunctions import findOnSystem
            missing = False
            if os.name in ('nt', 'dos'):
                name = 'blastn.exe'
示例#5
0
def run_analysis(self, input_file, options, stdout_file, stderr_file, email):
    class RedirectStdStreams(object):
        def __init__(self, stdout=None, stderr=None):
            if stdout is not None:
                stdout = open(stdout, 'w')
            if stderr is not None:
                stderr = open(stderr, 'w')
            self.stdout = stdout
            self.stderr = stderr
            self._stdout = stdout or sys.stdout
            self._stderr = stderr or sys.stderr

        def __enter__(self):
            self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
            self.old_stdout.flush()
            self.old_stderr.flush()
            sys.stdout, sys.stderr = self._stdout, self._stderr

        def __exit__(self, exc_type, exc_value, traceback):
            self._stdout.flush()
            self._stderr.flush()
            if sys.stdout is self.stdout:
                sys.stdout.close()
            if sys.stderr is self.stderr:
                sys.stderr.close()
            sys.stdout = self.old_stdout
            sys.stderr = self.old_stderr

    with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file):

        # Make directories and write fixed inputfiles:
        init = Initialize(options)
        init.createDirs()

        inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(
            [input_file])
        init.checkCacheConsistency(inputFiles)

        progress = 1
        self.update_state(state='PROGRESS',
                          meta={
                              'current': progress,
                              'total': seqCount * 4 + 2
                          })

        fastaFileBaseNames = []

        try:
            alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment')
        except PluginNotFoundError:
            from SAP.Alignment import Clustalw2 as alignmentPlugin
            # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment)
        aligner = alignmentPlugin.Aligner(options)

        try:
            assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment')
        except PluginNotFoundError:
            if options.assignment == "Barcoder":
                from SAP.Assignment import Barcoder as assignmentPlugin
            elif options.assignment == "ConstrainedNJ":
                from SAP.Assignment import ConstrainedNJ as assignmentPlugin
            else:
                assert 0
        # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment)
        assignment = assignmentPlugin.Assignment(options)

        uniqueDict = {}
        copyLaterDict = {}

        homolcompiler = HomolCompiler(options)

        inputQueryNames = {}

        # For each fasta file execute pipeline
        for fastaFileName in inputFiles:

            fastaFile = open(fastaFileName, 'r')
            fastaIterator = Fasta.Iterator(fastaFile,
                                           parser=Fasta.RecordParser())
            fastaFileBaseName = os.path.splitext(
                os.path.basename(fastaFileName))[0]
            fastaFileBaseNames.append(fastaFileBaseName)

            inputQueryNames[fastaFileBaseName] = {}

            for fastaRecord in fastaIterator:

                # Discard the header except for the first id word:
                fastaRecord.title = re.search(r'^(\S+)',
                                              fastaRecord.title).group(1)

                app.logger.info("file: {}, query: {}".format(
                    fastaFileBaseName, fastaRecord.title))

                inputQueryNames[fastaFileBaseName][fastaRecord.title] = True

                print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                # See if the sequence is been encountered before and if so skip it for now:
                if uniqueDict.has_key(fastaRecord.sequence):
                    copyLaterDict.setdefault(
                        uniqueDict[fastaRecord.sequence], []).append(
                            '%s_%s' % (fastaFileBaseName, fastaRecord.title))
                    print '\tsequence double - skipping...\n'
                    continue
                else:
                    uniqueDict[fastaRecord.sequence] = '%s_%s' % (
                        fastaFileBaseName, fastaRecord.title)

                # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                homologyResult = homolcompiler.compileHomologueSet(
                    fastaRecord, fastaFileBaseName)

                progress += 1
                self.update_state(state='PROGRESS',
                                  meta={
                                      'current': progress,
                                      'total': seqCount * 4 + 2
                                  })

                if homologyResult is not None:
                    # The homologyResult object serves as a job carrying the relevant information.

                    aligner.align(
                        os.path.join(options.homologcache,
                                     homologyResult.homologuesFileName))

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

                    try:
                        assignment.run(
                            os.path.join(options.alignmentcache,
                                         homologyResult.alignmentFileName))
                    except assignmentPlugin.AssignmentError, X:
                        print X.msg

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

                    treeStatistics = TreeStatistics(options)
                    treeStatistics.runTreeStatistics([
                        os.path.join(options.homologcache,
                                     homologyResult.homologuesPickleFileName)
                    ],
                                                     generateSummary=False)

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })
                else:
                    progress += 3
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

            fastaFile.close()

        # Make dictionary to map doubles the ones analyzed:
        doubleToAnalyzedDict = {}
        for k, l in copyLaterDict.items():
            doubleToAnalyzedDict.update(dict([[v, k] for v in l]))

        if not options.nocopycache and len(doubleToAnalyzedDict):
            # Copy cache files for sequences that occoured more than once:
            print "Copying cached results for %d doubles" % len(
                doubleToAnalyzedDict)
            copyCacheForSequenceDoubles(copyLaterDict, options)

        # Calculate the pairwise differences between sequences in each file:
        if options.diffs:
            pairwisediffs = PairWiseDiffs(options)
            pairwisediffs.runPairWiseDiffs(inputFiles)

        # Summary tree stats:
        print 'Computing tree statistics summary...'
        treeStatistics = TreeStatistics(options)
        treeStatistics.runTreeStatistics(
            inputFiles,
            generateSummary=True,
            doubleToAnalyzedDict=doubleToAnalyzedDict,
            inputQueryNames=inputQueryNames)
        print "done"

        progress += 1
        self.update_state(state='PROGRESS',
                          meta={
                              'current': progress,
                              'total': seqCount * 4 + 2
                          })

        # Make HTML output:
        print '\tGenerating HTML output...'

        resultHTML = ResultHTML(options)
        resultHTML.webify([options.treestatscache + '/summary.pickle'],
                          fastaFileBaseNames, doubleToAnalyzedDict,
                          sequenceNameMap)
        print 'done'

        # clean up files we won't need anyway
        shutil.rmtree(options.datadir)
        shutil.rmtree(options.homologcache)
        shutil.rmtree(options.blastcache)
        shutil.rmtree(options.dbcache)
        shutil.rmtree(options.treescache)
        shutil.rmtree(options.alignmentcache)
示例#6
0
文件: app.py 项目: kaspermunch/sap
def run_analysis(self, input_file, options, stdout_file, stderr_file, email):

    class RedirectStdStreams(object):
        def __init__(self, stdout=None, stderr=None):
            if stdout is not None:
                stdout = open(stdout, 'w')
            if stderr is not None:
                stderr = open(stderr, 'w')
            self.stdout = stdout
            self.stderr = stderr
            self._stdout = stdout or sys.stdout
            self._stderr = stderr or sys.stderr

        def __enter__(self):
            self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
            self.old_stdout.flush()
            self.old_stderr.flush()
            sys.stdout, sys.stderr = self._stdout, self._stderr

        def __exit__(self, exc_type, exc_value, traceback):
            self._stdout.flush(); self._stderr.flush()
            if sys.stdout is self.stdout:
                sys.stdout.close()
            if sys.stderr is self.stderr:
                sys.stderr.close()
            sys.stdout = self.old_stdout
            sys.stderr = self.old_stderr

    with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file):

        # Make directories and write fixed inputfiles:
        init = Initialize(options)
        init.createDirs()

        inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput([input_file])
        init.checkCacheConsistency(inputFiles)

        progress = 1
        self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

        fastaFileBaseNames = []

        try:
            alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment')
        except PluginNotFoundError:
            from SAP.Alignment import Clustalw2 as alignmentPlugin
            # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment)
        aligner = alignmentPlugin.Aligner(options)

        try:
            assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment')
        except PluginNotFoundError:
            if options.assignment == "Barcoder":
                from SAP.Assignment import Barcoder as assignmentPlugin
            elif options.assignment == "ConstrainedNJ":
                from SAP.Assignment import ConstrainedNJ as assignmentPlugin
            else:
                assert 0
           # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment)
        assignment = assignmentPlugin.Assignment(options)

        uniqueDict = {}
        copyLaterDict = {}

        homolcompiler = HomolCompiler(options)

        inputQueryNames = {}

        # For each fasta file execute pipeline
        for fastaFileName in inputFiles:

            fastaFile = open(fastaFileName, 'r')
            fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
            fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0]
            fastaFileBaseNames.append(fastaFileBaseName)

            inputQueryNames[fastaFileBaseName] = {}

            for fastaRecord in fastaIterator:

                # Discard the header except for the first id word:
                fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1)

                app.logger.info("file: {}, query: {}".format(fastaFileBaseName, fastaRecord.title))

                inputQueryNames[fastaFileBaseName][fastaRecord.title] = True

                print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                # See if the sequence is been encountered before and if so skip it for now:
                if uniqueDict.has_key(fastaRecord.sequence):
                    copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title))
                    print '\tsequence double - skipping...\n'
                    continue
                else:
                    uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title)

                # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName)

                progress += 1
                self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                if homologyResult is not None:
                    # The homologyResult object serves as a job carrying the relevant information.

                    aligner.align(os.path.join(options.homologcache, homologyResult.homologuesFileName))

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                    try:
                       assignment.run(os.path.join(options.alignmentcache, homologyResult.alignmentFileName))
                    except assignmentPlugin.AssignmentError, X:
                       print X.msg

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                    treeStatistics = TreeStatistics(options)
                    treeStatistics.runTreeStatistics([os.path.join(options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False)

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})
                else:
                    progress += 3
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

            fastaFile.close()

        # Make dictionary to map doubles the ones analyzed:
        doubleToAnalyzedDict = {}
        for k, l in copyLaterDict.items():
            doubleToAnalyzedDict.update(dict([[v,k] for v in l]))

        if not options.nocopycache and len(doubleToAnalyzedDict):
            # Copy cache files for sequences that occoured more than once:
            print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict)
            copyCacheForSequenceDoubles(copyLaterDict, options)

        # Calculate the pairwise differences between sequences in each file:
        if options.diffs:
            pairwisediffs = PairWiseDiffs(options)
            pairwisediffs.runPairWiseDiffs(inputFiles)

        # Summary tree stats:
        print 'Computing tree statistics summary...'
        treeStatistics = TreeStatistics(options)
        treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames)
        print "done"

        progress += 1
        self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

        # Make HTML output:
        print '\tGenerating HTML output...'

        resultHTML = ResultHTML(options)
        resultHTML.webify([options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
        print 'done'

        # clean up files we won't need anyway
        shutil.rmtree(options.datadir)
        shutil.rmtree(options.homologcache)
        shutil.rmtree(options.blastcache)
        shutil.rmtree(options.dbcache)
        shutil.rmtree(options.treescache)
        shutil.rmtree(options.alignmentcache)