Python PairWiseDiffs примеры использования

Язык программирования: Python

Пространство имен/Пакет: SAP.PairWiseDiffs

Класс/Тип: PairWiseDiffs

Примеров на hotexamples.com: 6

Python PairWiseDiffs - 6 примеров найдено. Это лучшие примеры Python кода для SAP.PairWiseDiffs.PairWiseDiffs, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

PairWiseDiffs(3)

runPairWiseDiffs(3)

Пример #1

Показать файл

Файл: ConsoleScripts.py Проект: cbirdlab/sap

                print "\tPool closed"

            # Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v, k] for v in l]))

            if not options.nocopycache and len(doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(
                    doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict, options)

            # Calculate the pairwise differences between sequences in each file:
            if options.diffs:
                pairwisediffs = PairWiseDiffs(options)
                pairwisediffs.runPairWiseDiffs(args)
                #runPairWiseDiffs(args)

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(options)
            treeStatistics.runTreeStatistics(
                args,
                generateSummary=True,
                doubleToAnalyzedDict=doubleToAnalyzedDict,
                inputQueryNames=inputQueryNames)
            print "done"

            # Make HTML output:
            print '\tGenerating HTML output...'

Пример #2

Показать файл

    def _resultProducer(self, jobID, abortEvent, inputFiles):

        try:

            sys.stdout = OutputEnqueue()

            global optionParser

            # Make directories and write fixed inputfiles:
            init = Initialize(optionParser.options)
            init.createDirs()

            inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(
                inputFiles)
            init.checkCacheConsistency(inputFiles)

            fastaFileBaseNames = []

            try:
                alignmentPlugin = findPlugin(optionParser.options.alignment,
                                             'SAP.alignment')
            except PluginNotFoundError:
                exec("from SAP.Alignment import %s as alignmentPlugin" %
                     optionParser.options.alignment)
            aligner = alignmentPlugin.Aligner(optionParser.options)

            try:
                assignmentPlugin = findPlugin(optionParser.options.assignment,
                                              'SAP.assignment')
            except PluginNotFoundError:
                exec("from SAP.Assignment import %s as assignmentPlugin" %
                     optionParser.options.assignment)
            assignment = assignmentPlugin.Assignment(optionParser.options)

            uniqueDict = {}
            copyLaterDict = {}

            homolcompiler = HomolCompiler(optionParser.options)

            inputQueryNames = {}

            # For each fasta file execute pipeline
            for fastaFileName in inputFiles:

                fastaFile = open(fastaFileName, 'r')
                fastaIterator = Fasta.Iterator(fastaFile,
                                               parser=Fasta.RecordParser())
                fastaFileBaseName = os.path.splitext(
                    os.path.basename(fastaFileName))[0]
                fastaFileBaseNames.append(fastaFileBaseName)

                if abortEvent():
                    return jobID

                inputQueryNames[fastaFileBaseName] = {}

                for fastaRecord in fastaIterator:

                    # Discard the header except for the first id word:
                    fastaRecord.title = re.search(r'^(\S+)',
                                                  fastaRecord.title).group(1)

                    inputQueryNames[fastaFileBaseName][
                        fastaRecord.title] = True

                    print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                    # See if the sequence is been encountered before and if so skip it for now:
                    if uniqueDict.has_key(fastaRecord.sequence):
                        copyLaterDict.setdefault(
                            uniqueDict[fastaRecord.sequence],
                            []).append('%s_%s' %
                                       (fastaFileBaseName, fastaRecord.title))
                        print '\tsequence double - skipping...\n'
                        continue
                    else:
                        uniqueDict[fastaRecord.sequence] = '%s_%s' % (
                            fastaFileBaseName, fastaRecord.title)

                    # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                    homologyResult = homolcompiler.compileHomologueSet(
                        fastaRecord, fastaFileBaseName)

                    if abortEvent():
                        return jobID

                    if homologyResult != None:
                        # The homologyResult object serves as a job carrying the relevant information.

                        aligner.align(
                            os.path.join(optionParser.options.homologcache,
                                         homologyResult.homologuesFileName))

                        if abortEvent():
                            return jobID

                        try:
                            assignment.run(
                                os.path.join(
                                    optionParser.options.alignmentcache,
                                    homologyResult.alignmentFileName))
                        except assignmentPlugin.AssignmentError, X:
                            print X.msg

                        if abortEvent():
                            return jobID

                        treeStatistics = TreeStatistics(optionParser.options)
                        treeStatistics.runTreeStatistics([
                            os.path.join(
                                optionParser.options.homologcache,
                                homologyResult.homologuesPickleFileName)
                        ],
                                                         generateSummary=False)

                        if abortEvent():
                            return jobID

                fastaFile.close()

            if abortEvent():
                return jobID


#             # Calculate the pairwise differences between sequences in each file:
#             if optionParser.options.diffs:
#                 pairwisediffs = PairWiseDiffs(optionParser.options)
#                 pairwisediffs.runPairWiseDiffs(inputFiles)
#                 #runPairWiseDiffs(inputFiles)
#
#             if abortEvent():
#                 return jobID

# Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v, k] for v in l]))

            if not optionParser.options.nocopycache and len(
                    doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(
                    doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict,
                                            optionParser.options)

            # Calculate the pairwise differences between sequences in each file:
            if optionParser.options.diffs:
                pairwisediffs = PairWiseDiffs(optionParser.options)
                pairwisediffs.runPairWiseDiffs(inputFiles)

            if abortEvent():
                return jobID

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(optionParser.options)
            treeStatistics.runTreeStatistics(
                inputFiles,
                generateSummary=True,
                doubleToAnalyzedDict=doubleToAnalyzedDict,
                inputQueryNames=inputQueryNames)
            print "done"

            if abortEvent():
                return jobID

            # Make HTML output:
            print '\tGenerating HTML output...'

            resultHTML = ResultHTML(optionParser.options)
            resultHTML.webify(
                [optionParser.options.treestatscache + '/summary.pickle'],
                fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
            print 'done'

            return jobID

Пример #3

Показать файл

Файл: GUI.py Проект: kaspermunch/sap

    def _resultProducer(self, jobID, abortEvent, inputFiles):

        try:

            sys.stdout = OutputEnqueue()

            global optionParser

            # Make directories and write fixed inputfiles:
            init = Initialize(optionParser.options)
            init.createDirs()
    
            inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(inputFiles)
            init.checkCacheConsistency(inputFiles)
        
            fastaFileBaseNames = []
    
            try:
               alignmentPlugin = findPlugin(optionParser.options.alignment, 'SAP.alignment')
            except PluginNotFoundError:
               exec("from SAP.Alignment import %s as alignmentPlugin" % optionParser.options.alignment)
            aligner = alignmentPlugin.Aligner(optionParser.options)

            try:
               assignmentPlugin = findPlugin(optionParser.options.assignment, 'SAP.assignment')
            except PluginNotFoundError:
               exec("from SAP.Assignment import %s as assignmentPlugin" % optionParser.options.assignment)
            assignment = assignmentPlugin.Assignment(optionParser.options)


            uniqueDict = {}
            copyLaterDict = {}

            homolcompiler = HomolCompiler(optionParser.options)

            inputQueryNames = {}
    
            # For each fasta file execute pipeline
            for fastaFileName in inputFiles:
    
                fastaFile = open(fastaFileName, 'r')
                fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
                fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0]
                fastaFileBaseNames.append(fastaFileBaseName)
    
                if abortEvent():
                    return jobID

                inputQueryNames[fastaFileBaseName] = {}
                
                for fastaRecord in fastaIterator:

                    
                    # Discard the header except for the first id word:
                    fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1)

                    inputQueryNames[fastaFileBaseName][fastaRecord.title] = True
    
                    print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)
    
                    # See if the sequence is been encountered before and if so skip it for now:
                    if uniqueDict.has_key(fastaRecord.sequence):
                        copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title))
                        print '\tsequence double - skipping...\n'
                        continue
                    else:
                        uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title)
    
                    # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                    homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName)

                    if abortEvent():
                        return jobID
    
                    if homologyResult != None:
                        # The homologyResult object serves as a job carrying the relevant information.

                        aligner.align(os.path.join(optionParser.options.homologcache, homologyResult.homologuesFileName))
    
                        if abortEvent():
                            return jobID
                         
                        try:
                           assignment.run(os.path.join(optionParser.options.alignmentcache, homologyResult.alignmentFileName))
                        except assignmentPlugin.AssignmentError, X:
                           print X.msg
    
                        if abortEvent():
                            return jobID
    
                        treeStatistics = TreeStatistics(optionParser.options)
                        treeStatistics.runTreeStatistics([os.path.join(optionParser.options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False)

                        if abortEvent():
                            return jobID                        

                fastaFile.close()
    
            if abortEvent():
                return jobID
    
#             # Calculate the pairwise differences between sequences in each file:
#             if optionParser.options.diffs:
#                 pairwisediffs = PairWiseDiffs(optionParser.options)
#                 pairwisediffs.runPairWiseDiffs(inputFiles)
#                 #runPairWiseDiffs(inputFiles)
#     
#             if abortEvent():
#                 return jobID
    
    
            # Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v,k] for v in l]))
    
            if not optionParser.options.nocopycache and len(doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict, optionParser.options)
                
            # Calculate the pairwise differences between sequences in each file:
            if optionParser.options.diffs:
                pairwisediffs = PairWiseDiffs(optionParser.options)
                pairwisediffs.runPairWiseDiffs(inputFiles)

            if abortEvent():
                return jobID

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(optionParser.options)
            treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames)
            print "done"
    
            if abortEvent():
                return jobID
    
            # Make HTML output:
            print '\tGenerating HTML output...'
    
            resultHTML = ResultHTML(optionParser.options)
            resultHTML.webify([optionParser.options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
            print 'done'
    
            return jobID

Пример #4

Показать файл

Файл: ConsoleScripts.py Проект: kaspermunch/sap

                poolStatus(pool)
                print "\tPool closed"

            # Make dictionary to map doubles the ones analyzed:
            doubleToAnalyzedDict = {}
            for k, l in copyLaterDict.items():
                doubleToAnalyzedDict.update(dict([[v,k] for v in l]))

            if not options.nocopycache and len(doubleToAnalyzedDict):
                # Copy cache files for sequences that occoured more than once:
                print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict)
                copyCacheForSequenceDoubles(copyLaterDict, options)

            # Calculate the pairwise differences between sequences in each file:
            if options.diffs:
                pairwisediffs = PairWiseDiffs(options)
                pairwisediffs.runPairWiseDiffs(args)
                #runPairWiseDiffs(args)

            # Summary tree stats:
            print 'Computing tree statistics summary...'
            treeStatistics = TreeStatistics(options)
            treeStatistics.runTreeStatistics(args, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames)
            print "done"

            # Make HTML output:
            print '\tGenerating HTML output...'

            resultHTML = ResultHTML(options)
            resultHTML.webify([options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
            print 'done'

Пример #5

Показать файл

def run_analysis(self, input_file, options, stdout_file, stderr_file, email):
    class RedirectStdStreams(object):
        def __init__(self, stdout=None, stderr=None):
            if stdout is not None:
                stdout = open(stdout, 'w')
            if stderr is not None:
                stderr = open(stderr, 'w')
            self.stdout = stdout
            self.stderr = stderr
            self._stdout = stdout or sys.stdout
            self._stderr = stderr or sys.stderr

        def __enter__(self):
            self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
            self.old_stdout.flush()
            self.old_stderr.flush()
            sys.stdout, sys.stderr = self._stdout, self._stderr

        def __exit__(self, exc_type, exc_value, traceback):
            self._stdout.flush()
            self._stderr.flush()
            if sys.stdout is self.stdout:
                sys.stdout.close()
            if sys.stderr is self.stderr:
                sys.stderr.close()
            sys.stdout = self.old_stdout
            sys.stderr = self.old_stderr

    with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file):

        # Make directories and write fixed inputfiles:
        init = Initialize(options)
        init.createDirs()

        inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput(
            [input_file])
        init.checkCacheConsistency(inputFiles)

        progress = 1
        self.update_state(state='PROGRESS',
                          meta={
                              'current': progress,
                              'total': seqCount * 4 + 2
                          })

        fastaFileBaseNames = []

        try:
            alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment')
        except PluginNotFoundError:
            from SAP.Alignment import Clustalw2 as alignmentPlugin
            # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment)
        aligner = alignmentPlugin.Aligner(options)

        try:
            assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment')
        except PluginNotFoundError:
            if options.assignment == "Barcoder":
                from SAP.Assignment import Barcoder as assignmentPlugin
            elif options.assignment == "ConstrainedNJ":
                from SAP.Assignment import ConstrainedNJ as assignmentPlugin
            else:
                assert 0
        # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment)
        assignment = assignmentPlugin.Assignment(options)

        uniqueDict = {}
        copyLaterDict = {}

        homolcompiler = HomolCompiler(options)

        inputQueryNames = {}

        # For each fasta file execute pipeline
        for fastaFileName in inputFiles:

            fastaFile = open(fastaFileName, 'r')
            fastaIterator = Fasta.Iterator(fastaFile,
                                           parser=Fasta.RecordParser())
            fastaFileBaseName = os.path.splitext(
                os.path.basename(fastaFileName))[0]
            fastaFileBaseNames.append(fastaFileBaseName)

            inputQueryNames[fastaFileBaseName] = {}

            for fastaRecord in fastaIterator:

                # Discard the header except for the first id word:
                fastaRecord.title = re.search(r'^(\S+)',
                                              fastaRecord.title).group(1)

                app.logger.info("file: {}, query: {}".format(
                    fastaFileBaseName, fastaRecord.title))

                inputQueryNames[fastaFileBaseName][fastaRecord.title] = True

                print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                # See if the sequence is been encountered before and if so skip it for now:
                if uniqueDict.has_key(fastaRecord.sequence):
                    copyLaterDict.setdefault(
                        uniqueDict[fastaRecord.sequence], []).append(
                            '%s_%s' % (fastaFileBaseName, fastaRecord.title))
                    print '\tsequence double - skipping...\n'
                    continue
                else:
                    uniqueDict[fastaRecord.sequence] = '%s_%s' % (
                        fastaFileBaseName, fastaRecord.title)

                # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                homologyResult = homolcompiler.compileHomologueSet(
                    fastaRecord, fastaFileBaseName)

                progress += 1
                self.update_state(state='PROGRESS',
                                  meta={
                                      'current': progress,
                                      'total': seqCount * 4 + 2
                                  })

                if homologyResult is not None:
                    # The homologyResult object serves as a job carrying the relevant information.

                    aligner.align(
                        os.path.join(options.homologcache,
                                     homologyResult.homologuesFileName))

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

                    try:
                        assignment.run(
                            os.path.join(options.alignmentcache,
                                         homologyResult.alignmentFileName))
                    except assignmentPlugin.AssignmentError, X:
                        print X.msg

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

                    treeStatistics = TreeStatistics(options)
                    treeStatistics.runTreeStatistics([
                        os.path.join(options.homologcache,
                                     homologyResult.homologuesPickleFileName)
                    ],
                                                     generateSummary=False)

                    progress += 1
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })
                else:
                    progress += 3
                    self.update_state(state='PROGRESS',
                                      meta={
                                          'current': progress,
                                          'total': seqCount * 4 + 2
                                      })

            fastaFile.close()

        # Make dictionary to map doubles the ones analyzed:
        doubleToAnalyzedDict = {}
        for k, l in copyLaterDict.items():
            doubleToAnalyzedDict.update(dict([[v, k] for v in l]))

        if not options.nocopycache and len(doubleToAnalyzedDict):
            # Copy cache files for sequences that occoured more than once:
            print "Copying cached results for %d doubles" % len(
                doubleToAnalyzedDict)
            copyCacheForSequenceDoubles(copyLaterDict, options)

        # Calculate the pairwise differences between sequences in each file:
        if options.diffs:
            pairwisediffs = PairWiseDiffs(options)
            pairwisediffs.runPairWiseDiffs(inputFiles)

        # Summary tree stats:
        print 'Computing tree statistics summary...'
        treeStatistics = TreeStatistics(options)
        treeStatistics.runTreeStatistics(
            inputFiles,
            generateSummary=True,
            doubleToAnalyzedDict=doubleToAnalyzedDict,
            inputQueryNames=inputQueryNames)
        print "done"

        progress += 1
        self.update_state(state='PROGRESS',
                          meta={
                              'current': progress,
                              'total': seqCount * 4 + 2
                          })

        # Make HTML output:
        print '\tGenerating HTML output...'

        resultHTML = ResultHTML(options)
        resultHTML.webify([options.treestatscache + '/summary.pickle'],
                          fastaFileBaseNames, doubleToAnalyzedDict,
                          sequenceNameMap)
        print 'done'

        # clean up files we won't need anyway
        shutil.rmtree(options.datadir)
        shutil.rmtree(options.homologcache)
        shutil.rmtree(options.blastcache)
        shutil.rmtree(options.dbcache)
        shutil.rmtree(options.treescache)
        shutil.rmtree(options.alignmentcache)

Пример #6

Показать файл

Файл: app.py Проект: kaspermunch/sap

def run_analysis(self, input_file, options, stdout_file, stderr_file, email):

    class RedirectStdStreams(object):
        def __init__(self, stdout=None, stderr=None):
            if stdout is not None:
                stdout = open(stdout, 'w')
            if stderr is not None:
                stderr = open(stderr, 'w')
            self.stdout = stdout
            self.stderr = stderr
            self._stdout = stdout or sys.stdout
            self._stderr = stderr or sys.stderr

        def __enter__(self):
            self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
            self.old_stdout.flush()
            self.old_stderr.flush()
            sys.stdout, sys.stderr = self._stdout, self._stderr

        def __exit__(self, exc_type, exc_value, traceback):
            self._stdout.flush(); self._stderr.flush()
            if sys.stdout is self.stdout:
                sys.stdout.close()
            if sys.stderr is self.stderr:
                sys.stderr.close()
            sys.stdout = self.old_stdout
            sys.stderr = self.old_stderr

    with RedirectStdStreams(stdout=stdout_file, stderr=stderr_file):

        # Make directories and write fixed inputfiles:
        init = Initialize(options)
        init.createDirs()

        inputFiles, seqCount, sequenceNameMap = init.fixAndMoveInput([input_file])
        init.checkCacheConsistency(inputFiles)

        progress = 1
        self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

        fastaFileBaseNames = []

        try:
            alignmentPlugin = findPlugin(options.alignment, 'SAP.alignment')
        except PluginNotFoundError:
            from SAP.Alignment import Clustalw2 as alignmentPlugin
            # exec("from SAP.Alignment import %s as alignmentPlugin" % options.alignment)
        aligner = alignmentPlugin.Aligner(options)

        try:
            assignmentPlugin = findPlugin(options.assignment, 'SAP.assignment')
        except PluginNotFoundError:
            if options.assignment == "Barcoder":
                from SAP.Assignment import Barcoder as assignmentPlugin
            elif options.assignment == "ConstrainedNJ":
                from SAP.Assignment import ConstrainedNJ as assignmentPlugin
            else:
                assert 0
           # exec("from SAP.Assignment import %s as assignmentPlugin" % options.assignment)
        assignment = assignmentPlugin.Assignment(options)

        uniqueDict = {}
        copyLaterDict = {}

        homolcompiler = HomolCompiler(options)

        inputQueryNames = {}

        # For each fasta file execute pipeline
        for fastaFileName in inputFiles:

            fastaFile = open(fastaFileName, 'r')
            fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
            fastaFileBaseName = os.path.splitext(os.path.basename(fastaFileName))[0]
            fastaFileBaseNames.append(fastaFileBaseName)

            inputQueryNames[fastaFileBaseName] = {}

            for fastaRecord in fastaIterator:

                # Discard the header except for the first id word:
                fastaRecord.title = re.search(r'^(\S+)', fastaRecord.title).group(1)

                app.logger.info("file: {}, query: {}".format(fastaFileBaseName, fastaRecord.title))

                inputQueryNames[fastaFileBaseName][fastaRecord.title] = True

                print "%s -> %s: " % (fastaFileBaseName, fastaRecord.title)

                # See if the sequence is been encountered before and if so skip it for now:
                if uniqueDict.has_key(fastaRecord.sequence):
                    copyLaterDict.setdefault(uniqueDict[fastaRecord.sequence], []).append('%s_%s' % (fastaFileBaseName, fastaRecord.title))
                    print '\tsequence double - skipping...\n'
                    continue
                else:
                    uniqueDict[fastaRecord.sequence] = '%s_%s' % (fastaFileBaseName, fastaRecord.title)

                # Find homologues: Fasta files and pickled homologyResult objects are written to homologcache
                homologyResult = homolcompiler.compileHomologueSet(fastaRecord, fastaFileBaseName)

                progress += 1
                self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                if homologyResult is not None:
                    # The homologyResult object serves as a job carrying the relevant information.

                    aligner.align(os.path.join(options.homologcache, homologyResult.homologuesFileName))

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                    try:
                       assignment.run(os.path.join(options.alignmentcache, homologyResult.alignmentFileName))
                    except assignmentPlugin.AssignmentError, X:
                       print X.msg

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

                    treeStatistics = TreeStatistics(options)
                    treeStatistics.runTreeStatistics([os.path.join(options.homologcache, homologyResult.homologuesPickleFileName)], generateSummary=False)

                    progress += 1
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})
                else:
                    progress += 3
                    self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

            fastaFile.close()

        # Make dictionary to map doubles the ones analyzed:
        doubleToAnalyzedDict = {}
        for k, l in copyLaterDict.items():
            doubleToAnalyzedDict.update(dict([[v,k] for v in l]))

        if not options.nocopycache and len(doubleToAnalyzedDict):
            # Copy cache files for sequences that occoured more than once:
            print "Copying cached results for %d doubles" % len(doubleToAnalyzedDict)
            copyCacheForSequenceDoubles(copyLaterDict, options)

        # Calculate the pairwise differences between sequences in each file:
        if options.diffs:
            pairwisediffs = PairWiseDiffs(options)
            pairwisediffs.runPairWiseDiffs(inputFiles)

        # Summary tree stats:
        print 'Computing tree statistics summary...'
        treeStatistics = TreeStatistics(options)
        treeStatistics.runTreeStatistics(inputFiles, generateSummary=True, doubleToAnalyzedDict=doubleToAnalyzedDict, inputQueryNames=inputQueryNames)
        print "done"

        progress += 1
        self.update_state(state='PROGRESS', meta={'current': progress, 'total': seqCount*4+2})

        # Make HTML output:
        print '\tGenerating HTML output...'

        resultHTML = ResultHTML(options)
        resultHTML.webify([options.treestatscache + '/summary.pickle'], fastaFileBaseNames, doubleToAnalyzedDict, sequenceNameMap)
        print 'done'

        # clean up files we won't need anyway
        shutil.rmtree(options.datadir)
        shutil.rmtree(options.homologcache)
        shutil.rmtree(options.blastcache)
        shutil.rmtree(options.dbcache)
        shutil.rmtree(options.treescache)
        shutil.rmtree(options.alignmentcache)