示例#1
0
    def test_create_kmers_from_sequence(self):
        kmers = KmerHelper.create_kmers_from_sequence(ReceptorSequence(amino_acid_sequence="ABCDEFG"), 3, sequence_type=SequenceType.AMINO_ACID)
        self.assertTrue("ABC" in kmers and "BCD" in kmers and "CDE" in kmers and "DEF" in kmers and "EFG" in kmers)
        self.assertEqual(5, len(kmers))

        kmers = KmerHelper.create_kmers_from_sequence(ReceptorSequence(amino_acid_sequence="AB"), 3, sequence_type=SequenceType.AMINO_ACID)
        self.assertTrue(len(kmers) == 0)
示例#2
0
    def encode_sequence(sequence: ReceptorSequence, params: EncoderParams):
        """
        Encodes a receptor sequence into a sequence of k-mers

        Args:
            sequence: ReceptorSequence object
            params: EncoderParams object with information on k-mer length

        Returns:

        """
        k = params.model["k"]
        sequence_type = params.model.get('sequence_type', None)
        length = len(sequence.get_sequence(sequence_type))

        if length < k:
            logging.warning(
                f'KmerSequenceEncoder: Sequence length {length} is less than {k}. Ignoring sequence...'
            )
            return None

        kmers = KmerHelper.create_kmers_from_sequence(
            sequence=sequence, k=k, sequence_type=sequence_type)

        return kmers
    def _encode_repertoire(self, repertoire, vectors):
        repertoire_vector = np.zeros(vectors.vector_size)
        for (index2, sequence) in enumerate(repertoire.sequences):
            kmers = KmerHelper.create_kmers_from_sequence(sequence=sequence,
                                                          k=self.k)
            sequence_vector = np.zeros(vectors.vector_size)
            for kmer in kmers:
                try:
                    word_vector = vectors.get_vector(kmer)
                    sequence_vector = np.add(sequence_vector, word_vector)
                except KeyError:
                    pass

            repertoire_vector = np.add(repertoire_vector, sequence_vector)
        return repertoire_vector
示例#4
0
    def encode_sequence(sequence: ReceptorSequence, params: EncoderParams):
        """
        creates overlapping continuous k-mers and IMGT position pairs from a sequence as features for use in
        KmerFrequencyEncoder object of type EncoderParams, same object as passed into KmerFrequencyEncoder.
        :param sequence: ReceptorSequence
        :param params: EncoderParams (where params["model"]["k"] is used)
        :return: SequenceEncodingResult
        """
        k = params.model["k"]
        length = len(sequence.get_sequence())

        if length < k:
            logging.warning('KmerSequenceEncoder: Sequence length is less than k. Ignoring sequence...')
            return None

        kmers = KmerHelper.create_kmers_from_sequence(sequence, k)

        return kmers