Exemplo n.º 1
0
 def add(self, header, sequence, force=False):
     key = hashSequence(sequence)
     if key in self.data:
         logging.info(
             f"blast::add:Bouncing {header}, sequence already stored")
         return
     self._buffer.append((header, sequence, key))
Exemplo n.º 2
0
 def _remove_from_mfasta(self):
     """Parse current multifasta and just keep sequence that are not in _delete_buffer in _buffer. The _buffer sequences can be then rewrite in a multifasta.
     """
     to_delete_headers = [buf[0] for buf in self._delete_buffer]
     for header, seq, _id in zFastaReader(self.fastaBufferFile):
         key = hashSequence(seq)
         _header = f">{header}"
         if not _header in to_delete_headers:
             self._buffer.append(
                 (_header, seq, key)
             )  # Add sequences that we want to keep to buffer for rewriting
Exemplo n.º 3
0
    def remove(self, header: str, sequence: str):
        """Remove sequence from blast database
        
        :param header: Sequence header
        :type header: str
        :param sequence: Nucleotide sequence
        :type sequence: str
        """
        key = hashSequence(sequence)
        if not key in self.data:
            logging.warn(
                f"blast::remove:Fasta sequence {header} doesn't exist in blast database"
            )
        else:
            self._delete_buffer.append((header, key))

        self._remove_from_mfasta()
Exemplo n.º 4
0
    def _index(self):
        if self.registry['pkl']:
            return self._restoreIndex(
                f"{self.location}/{self.registry['pkl']}")

        logging.info(f"Building index on {self.fastaFile}, "
                     f" this may take a while...")
        data = {}
        with zFile(self.fastaFile) as handle:
            for genome_seqrecord in SeqIO.parse(handle, "fasta"):
                genome_seq = genome_seqrecord.seq
                header = genome_seqrecord.id
                _id = hashSequence(str(genome_seq))
                data[_id] = header

        logging.info(f"{len(data.keys())} fasta records successfully indexed")
        return data
Exemplo n.º 5
0
 def get(self, **kwargs):
     if 'seq' in kwargs:
         return self[hashSequence(seq)]