Пример #1
0
    def __init__(self, fastaFileName, options, rebuild=False):
        """
        Opens a premade local data base and builds an index of taxon names.
        """

        self.options = options
        self.fastaFileName = fastaFileName

        baseName, ext = os.path.splitext(os.path.basename(self.fastaFileName))
        self.baseName = baseName
        self.dbDirName = os.path.splitext(self.fastaFileName)[0] + '.sapdb'
        self.blastSequenceFileName = os.path.join(self.dbDirName,
                                                  baseName + '.fasta')
        self.blastDB = os.path.join(self.dbDirName, baseName)
        #self.dbFileName = os.path.join(self.dbDirName, baseName + '.db')
        self.indexFileName = os.path.join(self.dbDirName, baseName + '.taxidx')
        self.summaryTreeFileName = os.path.join(self.dbDirName,
                                                baseName + '.html')

        # Build shelve database if not present:
        if rebuild or not os.path.exists(self.dbDirName) or len(
                glob.glob(os.path.join("%s/*" % self.dbDirName))) != 6:
            self._build()

        print "Loading database...",
        sys.stdout.flush()
        # Just open the db:
        #self.db = shelve.open(self.dbFileName, 'r')
        with open(self.indexFileName, 'r') as f:
            self.index = pickle.load(f)

        self.sequence_index = FastaIndex(self.blastSequenceFileName)
        print 'done'
        sys.stdout.flush()
Пример #2
0
    def __init__(self, fastaFileName, options, rebuild=False):
        """
        Opens a premade local data base and builds an index of taxon names.
        """

        self.options = options
        self.fastaFileName = fastaFileName

        baseName, ext = os.path.splitext(os.path.basename(self.fastaFileName))
        self.baseName = baseName
        self.dbDirName = os.path.splitext(self.fastaFileName)[0] + '.sapdb'
        self.blastSequenceFileName = os.path.join(self.dbDirName, baseName + '.fasta')
        self.blastDB = os.path.join(self.dbDirName, baseName)
        #self.dbFileName = os.path.join(self.dbDirName, baseName + '.db')
        self.indexFileName = os.path.join(self.dbDirName, baseName + '.taxidx')
        self.summaryTreeFileName = os.path.join(self.dbDirName, baseName + '.html')

        # Build shelve database if not present:
        if rebuild or not os.path.exists(self.dbDirName) or len(glob.glob(os.path.join("%s/*" % self.dbDirName))) != 6:
            self._build()

        print "Loading database...",
        sys.stdout.flush()
        # Just open the db:
        #self.db = shelve.open(self.dbFileName, 'r')
        with open(self.indexFileName, 'r') as f:
            self.index = pickle.load(f)

        self.sequence_index = FastaIndex(self.blastSequenceFileName)
        print 'done'
        sys.stdout.flush()
Пример #3
0
    def _build(self):
        """
        Build a local database from a file of fasta entries.
        """

        print "Building native SAP database...",
        sys.stdout.flush()

        # Make a dir for the database files:
        if not os.path.exists(self.dbDirName):
            os.mkdir(self.dbDirName)

        # Write a newline char version to a tmp file:
        fileContent = readFile(self.fastaFileName)
        fileContent = re.sub(r'\r+', '\n', fileContent)
        #tmpFile, tmpFileName = tempfile.mkstemp()
        tmpFileName = "SAPPY.temp.fasta"
        writeFile(tmpFileName, fileContent)
        self.fastaFileName = tmpFileName
        print "\n"

        # # Create a new shelve:
        # db = shelve.open(self.dbFileName, 'n')

        # Create an iterator over fasta entries:
        fastaFile = open(self.fastaFileName, 'r')
        fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
        print fastaFile
        #        exit (0)

        allowedLetters = IUPAC.IUPACAmbiguousDNA().letters

        # Initialize the index:
        index = {}

        # File to write sequences without gaps to for use as blast database:
        blastSequenceFile = open(self.blastSequenceFileName, 'w')

        taxonomySummary = Taxonomy.TaxonomySummary()

        for fastaRecord in fastaIterator:

            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(
                    r'\s*;\s*', fastaRecord.title.strip())
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName

                #except (ValueError, Taxonomy.ParsingError) as exc:
                #raise WrongFormatError(fastaRecord.title)
                #if not (identifier and taxonomyString and organismName):
                #raise WrongFormatError(fastaRecord.title)

                # Add an entry to the index:
                for taxonomyLevel in taxonomy:
                    index.setdefault(taxonomyLevel.name, []).append(identifier)

                # taxonomySummary.addTaxonomy(taxonomy)

                # Uppercase and replace wildcard letters with Ns:
                fastaRecord.sequence = fastaRecord.sequence.upper()
                fastaRecord.sequence = re.sub('[^%s-]' % allowedLetters, 'N',
                                              fastaRecord.sequence)

                # Write the sequence without gaps to the blast file:
                fastaRecord.sequence = fastaRecord.sequence.replace('-', '')
                blastSequenceFile.write(str(fastaRecord))

            except:
                pass

        # remove tmp files:
        #os.close(tmpFile)
        os.unlink(tmpFileName)

        # Close the fasta file:
        fastaFile.close()

        # Pickle the index:
        indexFile = open(self.indexFileName, 'w')
        pickle.dump(index, indexFile)
        indexFile.close()

        # <h1>Taxonomic summary for local database: ''' + self.dbDirName + '</h1><div class="taxonomysummary"><pre>' + str(taxonomySummary) + "</pre></div></html>"
        #
        #         fh = open(self.summaryTreeFileName, 'w')
        #         fh.write(html)
        #         fh.close()

        # Format the blast database:
        blastSequenceFile.close()
        cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (
            self.blastDB, self.blastSequenceFileName)

        cmd = self.escape(cmd)

        systemCall(cmd, stdout='IGNORE', stderr='IGNORE')

        # Create an index for the blastSequenceFile and pickle it:
        self.sequence_index = FastaIndex(self.blastSequenceFileName)

        # Make sure all the files have been written:
        import glob, time
        tries = 5
        while tries and len(glob.glob(os.path.join(
                "%s/*" % self.dbDirName))) != 7:
            time.sleep(1)
            tries -= 1

        print "done"
        sys.stdout.flush()
Пример #4
0
class DB(object):
    def __init__(self, fastaFileName, options, rebuild=False):
        """
        Opens a premade local data base and builds an index of taxon names.
        """

        self.options = options
        self.fastaFileName = fastaFileName

        baseName, ext = os.path.splitext(os.path.basename(self.fastaFileName))
        self.baseName = baseName
        self.dbDirName = os.path.splitext(self.fastaFileName)[0] + '.sapdb'
        self.blastSequenceFileName = os.path.join(self.dbDirName,
                                                  baseName + '.fasta')
        self.blastDB = os.path.join(self.dbDirName, baseName)
        #self.dbFileName = os.path.join(self.dbDirName, baseName + '.db')
        self.indexFileName = os.path.join(self.dbDirName, baseName + '.taxidx')
        self.summaryTreeFileName = os.path.join(self.dbDirName,
                                                baseName + '.html')

        # Build shelve database if not present:
        if rebuild or not os.path.exists(self.dbDirName) or len(
                glob.glob(os.path.join("%s/*" % self.dbDirName))) != 6:
            self._build()

        print "Loading database...",
        sys.stdout.flush()
        # Just open the db:
        #self.db = shelve.open(self.dbFileName, 'r')
        with open(self.indexFileName, 'r') as f:
            self.index = pickle.load(f)

        self.sequence_index = FastaIndex(self.blastSequenceFileName)
        print 'done'
        sys.stdout.flush()

    @staticmethod
    def escape(s):
        return s.replace('(', '\(').replace(')', '\)')

    def _build(self):
        """
        Build a local database from a file of fasta entries.
        """

        print "Building native SAP database...",
        sys.stdout.flush()

        # Make a dir for the database files:
        if not os.path.exists(self.dbDirName):
            os.mkdir(self.dbDirName)

        # Write a newline char version to a tmp file:
        fileContent = readFile(self.fastaFileName)
        fileContent = re.sub(r'\r+', '\n', fileContent)
        #tmpFile, tmpFileName = tempfile.mkstemp()
        tmpFileName = "SAPPY.temp.fasta"
        writeFile(tmpFileName, fileContent)
        self.fastaFileName = tmpFileName
        print "\n"

        # # Create a new shelve:
        # db = shelve.open(self.dbFileName, 'n')

        # Create an iterator over fasta entries:
        fastaFile = open(self.fastaFileName, 'r')
        fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())
        print fastaFile
        #        exit (0)

        allowedLetters = IUPAC.IUPACAmbiguousDNA().letters

        # Initialize the index:
        index = {}

        # File to write sequences without gaps to for use as blast database:
        blastSequenceFile = open(self.blastSequenceFileName, 'w')

        taxonomySummary = Taxonomy.TaxonomySummary()

        for fastaRecord in fastaIterator:

            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(
                    r'\s*;\s*', fastaRecord.title.strip())
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName

                #except (ValueError, Taxonomy.ParsingError) as exc:
                #raise WrongFormatError(fastaRecord.title)
                #if not (identifier and taxonomyString and organismName):
                #raise WrongFormatError(fastaRecord.title)

                # Add an entry to the index:
                for taxonomyLevel in taxonomy:
                    index.setdefault(taxonomyLevel.name, []).append(identifier)

                # taxonomySummary.addTaxonomy(taxonomy)

                # Uppercase and replace wildcard letters with Ns:
                fastaRecord.sequence = fastaRecord.sequence.upper()
                fastaRecord.sequence = re.sub('[^%s-]' % allowedLetters, 'N',
                                              fastaRecord.sequence)

                # Write the sequence without gaps to the blast file:
                fastaRecord.sequence = fastaRecord.sequence.replace('-', '')
                blastSequenceFile.write(str(fastaRecord))

            except:
                pass

        # remove tmp files:
        #os.close(tmpFile)
        os.unlink(tmpFileName)

        # Close the fasta file:
        fastaFile.close()

        # Pickle the index:
        indexFile = open(self.indexFileName, 'w')
        pickle.dump(index, indexFile)
        indexFile.close()

        # <h1>Taxonomic summary for local database: ''' + self.dbDirName + '</h1><div class="taxonomysummary"><pre>' + str(taxonomySummary) + "</pre></div></html>"
        #
        #         fh = open(self.summaryTreeFileName, 'w')
        #         fh.write(html)
        #         fh.close()

        # Format the blast database:
        blastSequenceFile.close()
        cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (
            self.blastDB, self.blastSequenceFileName)

        cmd = self.escape(cmd)

        systemCall(cmd, stdout='IGNORE', stderr='IGNORE')

        # Create an index for the blastSequenceFile and pickle it:
        self.sequence_index = FastaIndex(self.blastSequenceFileName)

        # Make sure all the files have been written:
        import glob, time
        tries = 5
        while tries and len(glob.glob(os.path.join(
                "%s/*" % self.dbDirName))) != 7:
            time.sleep(1)
            tries -= 1

        print "done"
        sys.stdout.flush()

    def search(self, fastaRecord, excludelist=[], usecache=True):

        # Write the query to a tmp file:
        tmpQueryFileIdent, tmpQueryFileName = tempfile.mkstemp()
        writeFile(tmpQueryFileName, str(fastaRecord))

        # File name used for blast cache
        fileSuffix = ''
        for name in excludelist:
            l = re.split(r'\s+', name)
            for n in l:
                fileSuffix += n[0]
        if fileSuffix:
            fileSuffix = '_' + fileSuffix

        blastFileName = os.path.join(
            self.options.blastcache,
            "%s.%d_%s%s.xml" % (fastaRecord.title, self.options.maxblasthits,
                                self.options.minsignificance, fileSuffix))

        if usecache and os.path.exists(
                blastFileName) and os.path.getsize(blastFileName) > 0:
            # Use cached blast result
            if excludelist:
                print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join(
                    excludelist),
            else:
                print "\n\t\tUsing cached Blast results...",
            sys.stdout.flush()

            blastFile = open(blastFileName, 'r')
        else:
            if excludelist:
                print "\n\t\tSearching database (excluding %s)..." % ', '.join(
                    excludelist),
            else:
                print "\n\t\tSearching database...",
            sys.stdout.flush()

            if excludelist:
                # Build a blast db of the relevant records:
                excludeIDs = []
                for taxon in excludelist:
                    excludeIDs.extend(self.index[taxon])
                includeIDs = self.sequence_index.keys.difference(
                    set(excludeIDs))

                if not includeIDs:
                    print "done (database exhausted)\n\t\t\t",
                    sys.stdout.flush()
                    return SearchResult(None)

                blastDBfileName = os.path.join(self.options.project,
                                               "tmpBlastDB.fasta")
                tmpFastaFile = open(blastDBfileName, 'w')

                for key in includeIDs:
                    tmpFastaFile.write(str(self.sequence_index.get_entry(key)))
                tmpFastaFile.close()
                cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (
                    blastDBfileName, blastDBfileName)
                #cmd = "xdformat -n -o %s %s" % (blastDBfileName, blastDBfileName)
                cmd = self.escape(cmd)
                systemCall(cmd, stdout='IGNORE', stderr='IGNORE')
            else:
                blastDBfileName = self.blastSequenceFileName
                #blastDBfileName = self.blastDB

            # Blast:
            if self.options.blastwordsize:
                wordSize = '-word_size %s' % self.options.blastwordsize
            else:
                wordSize = ''

            if self.options.nolowcomplexfilter:
                filterOption = '-dust no'
            else:
                filterOption = '-dust yes'  # FIXME: Check that this is an ok default... It is not the defalut in blastn

            cmd = 'blastn -db %s -outfmt 5 %s %s -evalue %s -max_target_seqs %s -query %s' \
                       % (blastDBfileName, wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits,
                          tmpQueryFileName)

            #            cmd = "blastn %s -e %f -p blastn -d %s.fasta -i %s -m 7" % (wordSize, self.options.minsignificance, blastDBfileName, tmpQueryFileName)
            cmd = self.escape(cmd)

            STARTUPINFO = None
            if os.name == 'nt':
                STARTUPINFO = subprocess.STARTUPINFO()
                STARTUPINFO.dwFlags |= subprocess.STARTF_USESHOWWINDOW
            proc = subprocess.Popen(cmd,
                                    shell=True,
                                    startupinfo=STARTUPINFO,
                                    stdout=subprocess.PIPE,
                                    stderr=subprocess.PIPE)
            stdout_value, stderr_value = proc.communicate()
            blastContent = str(stdout_value)

            #            stdin, stdout, stderr = os.popen3(cmd)

            for f in glob.glob(
                    os.path.join(self.options.project, 'tmpBlastDB.*')):
                os.remove(f)


#             blastContent = stdout.read()
#
#             stdout.close()
#             stdin.close()
#             stderr.close()

# This is a hack to remove an xml tag that just confuses things with blastn:
            blastContent = re.sub(r'<Hit_id>.*?</Hit_id>', '', blastContent)

            writeFile(blastFileName, blastContent)

            #blastFile = StringIO.StringIO(blastContent)
            blastFile = open(blastFileName, 'r')

        #blastRecord = NCBIXML.read(blastFile)
        try:
            blastRecord = NCBIXML.read(blastFile)
        except:
            blastRecord = None

        blastFile.close()

        os.close(tmpQueryFileIdent)
        os.unlink(tmpQueryFileName)

        print "done.\n\t\t\t",
        sys.stdout.flush()

        return SearchResult(blastRecord)

    def get(self, gi):

        try:
            record = self.sequence_index.get_entry(gi)
            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(
                    r'\s*;\s*', record.title)
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName
            except (ValueError, Taxonomy.ParsingError) as exc:
                raise WrongFormatError(record.title)
            if not (identifier and taxonomyString and organismName):
                raise WrongFormatError(record.title)

            retrievalStatus = '(l)'
        except:
            retrievalStatus = '(l!?)'
            return None, retrievalStatus

        return Homology.Homologue(gi=gi,
                                  sequence=record.sequence,
                                  taxonomy=taxonomy), retrievalStatus
Пример #5
0
    def _build(self):
        """
        Build a local database from a file of fasta entries.
        """

        print "Building native SAP database...",
        sys.stdout.flush()

        # Make a dir for the database files:
        if not os.path.exists(self.dbDirName):
            os.mkdir(self.dbDirName)

        # Write a newline char version to a tmp file:
        fileContent = readFile(self.fastaFileName)
        fileContent = re.sub(r'\r+', '\n', fileContent)
        tmpFile, tmpFileName = tempfile.mkstemp()
        writeFile(tmpFileName, fileContent)
        self.fastaFileName = tmpFileName

        # # Create a new shelve:
        # db = shelve.open(self.dbFileName, 'n')

        # Create an iterator over fasta entries:
        fastaFile = open(self.fastaFileName, 'r')
        fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())        

        allowedLetters = IUPAC.IUPACAmbiguousDNA().letters

        # Initialize the index:
        index = {}

        # File to write sequences without gaps to for use as blast database:
        blastSequenceFile = open(self.blastSequenceFileName, 'w')

        taxonomySummary = Taxonomy.TaxonomySummary()

        for fastaRecord in fastaIterator:

            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(r'\s*;\s*', fastaRecord.title.strip())
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName
            except (ValueError, Taxonomy.ParsingError) as exc:
                raise WrongFormatError(fastaRecord.title)
            if not (identifier and taxonomyString and organismName):
                raise WrongFormatError(fastaRecord.title)

            # Add an entry to the index:
            for taxonomyLevel in taxonomy:
                index.setdefault(taxonomyLevel.name, []).append(identifier)

            # taxonomySummary.addTaxonomy(taxonomy)

            # Uppercase and replace wildcard letters with Ns:
            fastaRecord.sequence = fastaRecord.sequence.upper()
            fastaRecord.sequence = re.sub('[^%s-]' % allowedLetters, 'N', fastaRecord.sequence)

            # Write the sequence without gaps to the blast file:
            fastaRecord.sequence = fastaRecord.sequence.replace('-', '')
            blastSequenceFile.write(str(fastaRecord))

        # remove tmp files:
        os.close(tmpFile)
        os.unlink(tmpFileName)

        # Close the fasta file:
        fastaFile.close()

        # Pickle the index:
        indexFile = open(self.indexFileName, 'w')
        pickle.dump(index, indexFile)
        indexFile.close()

# <h1>Taxonomic summary for local database: ''' + self.dbDirName + '</h1><div class="taxonomysummary"><pre>' + str(taxonomySummary) + "</pre></div></html>"
#
#         fh = open(self.summaryTreeFileName, 'w')
#         fh.write(html)
#         fh.close()

        # Format the blast database:
        blastSequenceFile.close()
        cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (self.blastDB, self.blastSequenceFileName)

        cmd = self.escape(cmd)

        systemCall(cmd, stdout='IGNORE', stderr='IGNORE')

        # Create an index for the blastSequenceFile and pickle it:
        self.sequence_index = FastaIndex(self.blastSequenceFileName)

        # Make sure all the files have been written:
        import glob, time
        tries = 5
        while tries and len(glob.glob(os.path.join("%s/*" % self.dbDirName))) != 7:
            time.sleep(1)
            tries -= 1

        print "done"
        sys.stdout.flush()
Пример #6
0
class DB(object):

    def __init__(self, fastaFileName, options, rebuild=False):
        """
        Opens a premade local data base and builds an index of taxon names.
        """

        self.options = options
        self.fastaFileName = fastaFileName

        baseName, ext = os.path.splitext(os.path.basename(self.fastaFileName))
        self.baseName = baseName
        self.dbDirName = os.path.splitext(self.fastaFileName)[0] + '.sapdb'
        self.blastSequenceFileName = os.path.join(self.dbDirName, baseName + '.fasta')
        self.blastDB = os.path.join(self.dbDirName, baseName)
        #self.dbFileName = os.path.join(self.dbDirName, baseName + '.db')
        self.indexFileName = os.path.join(self.dbDirName, baseName + '.taxidx')
        self.summaryTreeFileName = os.path.join(self.dbDirName, baseName + '.html')

        # Build shelve database if not present:
        if rebuild or not os.path.exists(self.dbDirName) or len(glob.glob(os.path.join("%s/*" % self.dbDirName))) != 6:
            self._build()

        print "Loading database...",
        sys.stdout.flush()
        # Just open the db:
        #self.db = shelve.open(self.dbFileName, 'r')
        with open(self.indexFileName, 'r') as f:
            self.index = pickle.load(f)

        self.sequence_index = FastaIndex(self.blastSequenceFileName)
        print 'done'
        sys.stdout.flush()

    @staticmethod
    def escape(s):
        return s.replace('(', '\(').replace(')', '\)')


    def _build(self):
        """
        Build a local database from a file of fasta entries.
        """

        print "Building native SAP database...",
        sys.stdout.flush()

        # Make a dir for the database files:
        if not os.path.exists(self.dbDirName):
            os.mkdir(self.dbDirName)

        # Write a newline char version to a tmp file:
        fileContent = readFile(self.fastaFileName)
        fileContent = re.sub(r'\r+', '\n', fileContent)
        tmpFile, tmpFileName = tempfile.mkstemp()
        writeFile(tmpFileName, fileContent)
        self.fastaFileName = tmpFileName

        # # Create a new shelve:
        # db = shelve.open(self.dbFileName, 'n')

        # Create an iterator over fasta entries:
        fastaFile = open(self.fastaFileName, 'r')
        fastaIterator = Fasta.Iterator(fastaFile, parser=Fasta.RecordParser())        

        allowedLetters = IUPAC.IUPACAmbiguousDNA().letters

        # Initialize the index:
        index = {}

        # File to write sequences without gaps to for use as blast database:
        blastSequenceFile = open(self.blastSequenceFileName, 'w')

        taxonomySummary = Taxonomy.TaxonomySummary()

        for fastaRecord in fastaIterator:

            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(r'\s*;\s*', fastaRecord.title.strip())
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName
            except (ValueError, Taxonomy.ParsingError) as exc:
                raise WrongFormatError(fastaRecord.title)
            if not (identifier and taxonomyString and organismName):
                raise WrongFormatError(fastaRecord.title)

            # Add an entry to the index:
            for taxonomyLevel in taxonomy:
                index.setdefault(taxonomyLevel.name, []).append(identifier)

            # taxonomySummary.addTaxonomy(taxonomy)

            # Uppercase and replace wildcard letters with Ns:
            fastaRecord.sequence = fastaRecord.sequence.upper()
            fastaRecord.sequence = re.sub('[^%s-]' % allowedLetters, 'N', fastaRecord.sequence)

            # Write the sequence without gaps to the blast file:
            fastaRecord.sequence = fastaRecord.sequence.replace('-', '')
            blastSequenceFile.write(str(fastaRecord))

        # remove tmp files:
        os.close(tmpFile)
        os.unlink(tmpFileName)

        # Close the fasta file:
        fastaFile.close()

        # Pickle the index:
        indexFile = open(self.indexFileName, 'w')
        pickle.dump(index, indexFile)
        indexFile.close()

# <h1>Taxonomic summary for local database: ''' + self.dbDirName + '</h1><div class="taxonomysummary"><pre>' + str(taxonomySummary) + "</pre></div></html>"
#
#         fh = open(self.summaryTreeFileName, 'w')
#         fh.write(html)
#         fh.close()

        # Format the blast database:
        blastSequenceFile.close()
        cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (self.blastDB, self.blastSequenceFileName)

        cmd = self.escape(cmd)

        systemCall(cmd, stdout='IGNORE', stderr='IGNORE')

        # Create an index for the blastSequenceFile and pickle it:
        self.sequence_index = FastaIndex(self.blastSequenceFileName)

        # Make sure all the files have been written:
        import glob, time
        tries = 5
        while tries and len(glob.glob(os.path.join("%s/*" % self.dbDirName))) != 7:
            time.sleep(1)
            tries -= 1

        print "done"
        sys.stdout.flush()

    def search(self, fastaRecord, excludelist=[], usecache=True):

        # Write the query to a tmp file:
        tmpQueryFileIdent, tmpQueryFileName = tempfile.mkstemp()
        writeFile(tmpQueryFileName, str(fastaRecord))

        # File name used for blast cache
        fileSuffix = ''
        for name in excludelist:
            l = re.split(r'\s+', name)
            for n in l:
                fileSuffix += n[0]
        if fileSuffix:
           fileSuffix = '_' + fileSuffix

        blastFileName = os.path.join(self.options.blastcache, "%s.%d_%s%s.xml" % (fastaRecord.title,
                                               self.options.maxblasthits, self.options.minsignificance, fileSuffix))

        if usecache and os.path.exists(blastFileName) and os.path.getsize(blastFileName)>0:
            # Use cached blast result
            if excludelist:
               print "\n\t\tUsing cached Blast results (excluding %s)..." % ', '.join(excludelist),
            else:
                print "\n\t\tUsing cached Blast results...", 
            sys.stdout.flush()

            blastFile = open(blastFileName, 'r')
        else:
            if excludelist:
                print "\n\t\tSearching database (excluding %s)..." % ', '.join(excludelist), 
            else:
                print "\n\t\tSearching database...", 
            sys.stdout.flush()

            if excludelist:
                # Build a blast db of the relevant records:
                excludeIDs = []
                for taxon in excludelist:
                    excludeIDs.extend(self.index[taxon])
                includeIDs = self.sequence_index.keys.difference(set(excludeIDs))

                if not includeIDs:
                    print "done (database exhausted)\n\t\t\t",
                    sys.stdout.flush()       
                    return SearchResult(None)

                blastDBfileName = os.path.join(self.options.project, "tmpBlastDB.fasta")
                tmpFastaFile = open(blastDBfileName, 'w')

                for key in includeIDs:
                    tmpFastaFile.write(str(self.sequence_index.get_entry(key)))
                tmpFastaFile.close()
                cmd = "makeblastdb -dbtype nucl -title %s -in %s" % (blastDBfileName, blastDBfileName)
                #cmd = "xdformat -n -o %s %s" % (blastDBfileName, blastDBfileName)
                cmd = self.escape(cmd)
                systemCall(cmd, stdout='IGNORE', stderr='IGNORE')
            else:
                blastDBfileName = self.blastSequenceFileName
                #blastDBfileName = self.blastDB
            
            # Blast:
            if self.options.blastwordsize:
                wordSize = '-word_size %s' % self.options.blastwordsize
            else:
                wordSize = ''

            if self.options.nolowcomplexfilter:
                filterOption = '-dust no'
            else:
                filterOption = '-dust yes' # FIXME: Check that this is an ok default... It is not the defalut in blastn

            cmd = 'blastn -db %s -outfmt 5 %s %s -evalue %s -max_target_seqs %s -query %s' \
                       % (blastDBfileName, wordSize, filterOption, self.options.minsignificance, self.options.maxblasthits,
                          tmpQueryFileName)
               
#            cmd = "blastn %s -e %f -p blastn -d %s.fasta -i %s -m 7" % (wordSize, self.options.minsignificance, blastDBfileName, tmpQueryFileName)
            cmd = self.escape(cmd)

            STARTUPINFO = None
            if os.name == 'nt':
                STARTUPINFO = subprocess.STARTUPINFO()
                STARTUPINFO.dwFlags |= subprocess.STARTF_USESHOWWINDOW
            proc = subprocess.Popen(cmd, shell=True, startupinfo=STARTUPINFO,
                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stdout_value, stderr_value = proc.communicate()
            blastContent = str(stdout_value)

#            stdin, stdout, stderr = os.popen3(cmd)

            for f in glob.glob(os.path.join(self.options.project, 'tmpBlastDB.*')):
                os.remove(f)

#             blastContent = stdout.read()
# 
#             stdout.close()
#             stdin.close()
#             stderr.close()

            # This is a hack to remove an xml tag that just confuses things with blastn:
            blastContent = re.sub(r'<Hit_id>.*?</Hit_id>', '', blastContent)

            writeFile(blastFileName, blastContent)

            #blastFile = StringIO.StringIO(blastContent)
            blastFile = open(blastFileName, 'r')

        #blastRecord = NCBIXML.read(blastFile)
        try:
            blastRecord = NCBIXML.read(blastFile)
        except:
            blastRecord = None

        blastFile.close()

        os.close(tmpQueryFileIdent)
        os.unlink(tmpQueryFileName)

        print "done.\n\t\t\t",
        sys.stdout.flush()
        
        return SearchResult(blastRecord)
        
    def get(self, gi):

        try:
            record = self.sequence_index.get_entry(gi)
            taxonomy = Taxonomy.Taxonomy()
            try:
                identifier, taxonomyString, organismName = re.split(r'\s*;\s*', record.title)
                taxonomy.populateFromString(taxonomyString)
                taxonomy.organism = organismName
            except (ValueError, Taxonomy.ParsingError) as exc:
                raise WrongFormatError(record.title)
            if not (identifier and taxonomyString and organismName):
                raise WrongFormatError(record.title)

            retrievalStatus = '(l)'
        except:
            retrievalStatus = '(l!?)'
            return None, retrievalStatus

        return Homology.Homologue(gi=gi,
                                  sequence=record.sequence,
                                  taxonomy=taxonomy), retrievalStatus