Exemplo n.º 1
0
 def _generate(self, input_seq, n_seqs=1, show_score=False):
     header, seq = input_seq
     # find best/worst n_differences positions
     seq_items, n_differences_ids = self._find_key_positions(seq)
     # replace all possible kmers of size n_differences
     gen_seqs = list(self._replace(seq_items, n_differences_ids))
     # keep the best/worst
     preds = predict(iterable=gen_seqs,
                     estimator=self.estimator,
                     vectorizer=self.vectorizer,
                     mode='decision_function',
                     n_blocks=5,
                     block_size=None,
                     n_jobs=self.n_jobs)
     sorted_pred_ids = np.argsort(preds)
     if self.enhance:
         n_seqs_ids = sorted_pred_ids[-n_seqs:]
         n_seqs_ids = n_seqs_ids[::-1]
     else:
         n_seqs_ids = sorted_pred_ids[:n_seqs]
     if show_score:
         return zip(
             np.array(preds)[n_seqs_ids],
             np.array(gen_seqs)[n_seqs_ids])
     else:
         return np.array(gen_seqs)[n_seqs_ids]
Exemplo n.º 2
0
 def _generate(self, input_seq, n_seqs=1, show_score=False):
     header, seq = input_seq
     # find best/worst n_differences positions
     seq_items, n_differences_ids = self._find_key_positions(seq)
     # replace all possible kmers of size n_differences
     gen_seqs = list(self._replace(seq_items, n_differences_ids))
     # keep the best/worst
     preds = predict(iterable=gen_seqs,
                     estimator=self.estimator,
                     vectorizer=self.vectorizer,
                     mode='decision_function',
                     n_blocks=5,
                     block_size=None,
                     n_jobs=self.n_jobs)
     sorted_pred_ids = np.argsort(preds)
     if self.enhance:
         n_seqs_ids = sorted_pred_ids[-n_seqs:]
         n_seqs_ids = n_seqs_ids[::-1]
     else:
         n_seqs_ids = sorted_pred_ids[:n_seqs]
     if show_score:
         return zip(np.array(preds)[n_seqs_ids],
                    np.array(gen_seqs)[n_seqs_ids])
     else:
         return np.array(gen_seqs)[n_seqs_ids]
Exemplo n.º 3
0
    def sample(self,
               seqs,
               n_seqs=1,
               show_score=False,
               enhance=None,
               n_differences=None):
        """Generate sequences starting from input sequences that are 'better' if enhance is set to True
        ('worse' otherwise) given the set of sequences used in the fit phase.

        Parameters
        ----------
        seqs : iterable strings
            Input sequences.

        n_seqs : int (default: 1)
            Number of sequences to be generated starting from each sequence in input.

        show_score: bool (default: False)
            If True the return type is a pair consisting of a score and a sequence. If
            False the return type is a sequence.

        enhance : bool (default None)
            If set to True then the score computed by the estimator will be higher for the sequences
            generated than for the input sequences. If False than the score will be lower. If None
            the state set in the initializer is used.

        n_differences : int (default None)
            Number of characters that differ for the generated sequence from the original input sequence.
            If None the number set in the initializer is used.

        Returns
        -------
        sequences : iterable sequences
            List of sequences or (score, sequence) pairs if show_score is True.
        """
        if enhance is not None:
            self.enhance = enhance
        if n_differences is not None:
            self.n_differences = n_differences
        for seq in seqs:
            if show_score:
                preds = predict(iterable=[seq],
                                estimator=self.estimator,
                                vectorizer=self.vectorizer,
                                mode='decision_function',
                                n_blocks=5,
                                block_size=None,
                                n_jobs=self.n_jobs)
                logger.debug('%s\n%+.3f %s' % (seq[0], preds[0], seq[1]))
            gen_seqs = self._generate(seq,
                                      n_seqs=n_seqs,
                                      show_score=show_score)
            for gen_seq in gen_seqs:
                yield gen_seq
Exemplo n.º 4
0
    def sample(self, seqs, n_seqs=1, show_score=False, enhance=None, n_differences=None):
        """Generate sequences starting from input sequences that are 'better' if enhance is set to True
        ('worse' otherwise) given the set of sequences used in the fit phase.

        Parameters
        ----------
        seqs : iterable strings
            Input sequences.

        n_seqs : int (default: 1)
            Number of sequences to be generated starting from each sequence in input.

        show_score: bool (default: False)
            If True the return type is a pair consisting of a score and a sequence. If
            False the return type is a sequence.

        enhance : bool (default None)
            If set to True then the score computed by the estimator will be higher for the sequences
            generated than for the input sequences. If False than the score will be lower. If None
            the state set in the initializer is used.

        n_differences : int (default None)
            Number of characters that differ for the generated sequence from the original input sequence.
            If None the number set in the initializer is used.

        Returns
        -------
        sequences : iterable sequences
            List of sequences or (score, sequence) pairs if show_score is True.
        """
        if enhance is not None:
            self.enhance = enhance
        if n_differences is not None:
            self.n_differences = n_differences
        for seq in seqs:
            if show_score:
                preds = predict(iterable=[seq],
                                estimator=self.estimator,
                                vectorizer=self.vectorizer,
                                mode='decision_function', n_blocks=5, block_size=None, n_jobs=self.n_jobs)
                logger.debug('%s\n%+.3f %s' % (seq[0], preds[0], seq[1]))
            gen_seqs = self._generate(seq, n_seqs=n_seqs, show_score=show_score)
            for gen_seq in gen_seqs:
                yield gen_seq