def encode_sequence(sequence: ReceptorSequence, params: EncoderParams):
        """
        creates overlapping continuous k-mers from a sequence as features for use in KmerFrequencyEncoder
        object of type EncoderParams, same object as passed into KmerFrequencyEncoder
        :param sequence: ReceptorSequence
        :param params: EncoderParams (where params["model"]["k"] is used)
        :return: SequenceEncodingResult consisting of features and feature information names
        """
        k = params.model["k"]
        sequence_type = params.model.get('sequence_type', None)
        length = len(sequence.get_sequence(sequence_type))

        if length < k:
            logging.warning(
                'Sequence length is less than k. Ignoring sequence')
            return None

        kmers = KmerHelper.create_IMGT_kmers_from_sequence(
            sequence=sequence, k=k, sequence_type=sequence_type)

        kmers = [
            Constants.FEATURE_DELIMITER.join([str(mer) for mer in kmer])
            for kmer in kmers
        ]

        return kmers
示例#2
0
 def test_create_IMGT_kmers_from_sequence(self):
     kmers = KmerHelper.create_IMGT_kmers_from_sequence(ReceptorSequence("CASSRYUF"), 3, sequence_type=SequenceType.AMINO_ACID)
     self.assertTrue(("CAS", 105) in kmers)
     self.assertTrue(("ASS", 106) in kmers)
     self.assertTrue(("SSR", 107) in kmers)
     self.assertTrue(("SRY", 108) in kmers)
     self.assertTrue(("RYU", 114) in kmers)
     self.assertTrue(("YUF", 115) in kmers)