Exemplo n.º 1
0
    def __getitem__(self, i):
        if not self.random_pairing:
            if self.rnn:
                instance = preprocess_seq_for_rnn(self.sequences[i], self.num_alphabets, self.cfg, augment=False)
                return instance
            else:
                sys.exit("PLUS-TFM Pfam pre-training without random pairing is not supported")
        else:
            trial = 0
            while(1):
                sequence0 = self.sequences[i + trial // 10]
                is_samefamily = random.random() > 0.5
                if is_samefamily: family = self.structs[i + trial // 10]
                else:
                    while(1):
                        family = random.randint(0, len(self.structs_idx) - 1)
                        if family != self.structs[i + trial // 10] or self.sanity_check: break
                sequence1 = self.sequences[random.choice(self.structs_idx[family])]
                is_samefamily = torch.tensor([is_samefamily])
                if self.max_len is None or len(sequence0) + len(sequence1) + 3 <= self.max_len: break
                else: trial += 1

            if self.rnn:
                instance0 = preprocess_seq_for_rnn(sequence0, self.num_alphabets, self.cfg)
                instance1 = preprocess_seq_for_rnn(sequence1, self.num_alphabets, self.cfg)
                return instance0, instance1, is_samefamily
            else:
                instance = preprocess_seq_for_tfm(sequence0, sequence1, self.num_alphabets, self.cfg, self.max_len)
                return (*instance, is_samefamily)
Exemplo n.º 2
0
 def __getitem__(self, i):
     sequence0, sequence1 = self.sequences0[i], self.sequences1[i]
     is_samefamily = self.labels[i]
     if self.rnn:
         instance0 = preprocess_seq_for_rnn(sequence0, self.num_alphabets, self.cfg, self.augment)
         instance1 = preprocess_seq_for_rnn(sequence1, self.num_alphabets, self.cfg, self.augment)
         return instance0, instance1, is_samefamily
     else:
         instance = preprocess_seq_for_tfm(sequence0, sequence1, self.num_alphabets, self.cfg, self.max_len, self.augment)
         return (*instance, is_samefamily)
Exemplo n.º 3
0
 def __getitem__(self, i):
     if self.rnn:
         instance = preprocess_seq_for_rnn(self.sequences[i], self.num_alphabets, self.cfg, self.augment)
         return instance, self.labels[i]
     else:
         instance_seq = preprocess_seq_for_tfm(self.sequences[i], None, self.num_alphabets, self.cfg, self.max_len, self.augment)
         if self.valids is None:
             return (*instance_seq, self.labels[i])
         else:
             instance_label = preprocess_label_for_tfm(self.labels[i], self.valids[i], self.max_len)
             return (*instance_seq, *instance_label)
Exemplo n.º 4
0
 def __getitem__(self, i):
     sequence0, sequence1 = self.sequences0[i], self.sequences1[i]
     similarity_level = self.labels[i]
     if self.rnn:
         instance0 = preprocess_seq_for_rnn(sequence0, self.num_alphabets, self.cfg, self.augment)
         instance1 = preprocess_seq_for_rnn(sequence1, self.num_alphabets, self.cfg, self.augment)
         if self.cmaps0 is not None: return instance0, instance1, similarity_level, self.cmaps0[i], self.cmaps1[i]
         else:                       return instance0, instance1, similarity_level
     else:
         instance = preprocess_seq_for_tfm(sequence0, sequence1, self.num_alphabets, self.cfg, self.max_len, self.augment)
         if self.cmaps0 is None: return (*instance, similarity_level)
         else:                   return (*instance, similarity_level, self.cmaps0[i], self.cmaps1[i])
Exemplo n.º 5
0
    def __getitem__(self, k):
        n = len(self.sequences)
        i, j = k // n, k % n
        sequence0, sequence1 = self.sequences[i], self.sequences[j]
        similarity_level = self.labels[i, j]

        if self.rnn:
            instance0 = preprocess_seq_for_rnn(sequence0, self.num_alphabets, self.cfg)
            instance1 = preprocess_seq_for_rnn(sequence1, self.num_alphabets, self.cfg)
            if self.cmaps is not None: return instance0, instance1, similarity_level, self.cmaps[i], self.cmaps[j]
            else:                      return instance0, instance1, similarity_level
        else:
            instance = preprocess_seq_for_tfm(sequence0, sequence1, self.num_alphabets, self.cfg, self.max_len)
            if self.cmaps is not None: return (*instance, similarity_level, self.cmaps[i], self.cmaps[j])
            else:                      return (*instance, similarity_level)
Exemplo n.º 6
0
    def __getitem__(self, i):
        if self.rnn: instance = preprocess_seq_for_rnn(self.sequences[i], self.num_alphabets, self.cfg, augment=False)
        else:        instance = preprocess_seq_for_tfm(self.sequences[i], None, self.num_alphabets, self.cfg, self.max_len, augment=False)

        return instance