Пример #1
0
 def get_inds(self, seq):
     lcas = motif_seq.LcSeq(seq, self.k, self.lca, 'lca')
     lces = motif_seq.LcSeq(seq, self.k, self.lce, 'lce')
     lca_in, lca_out = lcas._get_motif_indexes()
     lce_in, lce_out = lces._get_motif_indexes()
     ind_in = lca_in.union(lce_in)
     return ind_in
Пример #2
0
 def in_out_kappa(self):
     df = pd.read_csv(self.train_fpi, sep='\t', index_col=0)
     df = df[df['y'] == 0]
     seqs = list(df['Sequence'])
     all_deltas = []
     net_charges = []
     frac_charges = []
     for seq in seqs:
         ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca')
         in_seq, out_seq = ms.seq_in_motif()
         in_kmer, out_kmer = ms.overlapping_kmer_in_motif()
         if len(in_kmer) > 20:
             ka = kappa.KappaKmers(out_kmer, out_seq)
             if ka.FCR() > 0.1:
                 delta = ka.deltaForm()
                 net_charges.append(ka.NCPR())
                 print(out_seq)
                 print(delta)
                 all_deltas.append(delta)
                 frac_charges.append(ka.FCR())
     #plt.hist(net_charges)
     plt.scatter(net_charges, all_deltas, alpha=0.5, color='grey')
     #plt.ylim([0, 0.35])
     plt.ylim([0, 0.5])
     plt.xlim([-0.8, 0.8])
     #plt.xlim([0, 0.4])
     plt.xlabel('Net charge per residue', size=14)
     plt.ylabel('Charge Asymmetry (Delta)', size=14)
     plt.title('Outside LC Motifs')
     plt.show()
Пример #3
0
 def normal_charge_properties(self):
     df = pd.read_csv(self.train_fpi, sep='\t', index_col=0)
     df = df[df['y'] == 0]
     seqs = list(df['Sequence'])
     all_deltas = []
     net_charges = []
     frac_charges = []
     all_seq_in = ''
     for seq in seqs:
         ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca')
         in_seq, out_seq = ms.seq_in_motif()
         in_kmer, out_kmer = ms.overlapping_kmer_in_motif()
         if len(in_kmer) > 20:
             ka = kappa.KappaKmers(out_kmer, out_seq)
             delta = ka.deltaForm()
             if ka.NCPR() > -0.1 and ka.NCPR() < 0.1:
                 if delta < 0.1:
                     ns = norm_score.NormScore()
                     score = ns.lc_norm_score([seq])[0]
                     if score > 20:
                         if ka.FCR() < 0.2:
                             all_seq_in += in_seq
     analysed_seq = ProteinAnalysis(all_seq_in)
     aa_perc = analysed_seq.get_amino_acids_percent()
     print(aa_perc)
Пример #4
0
 def in_out_kappa(self):
     df = pd.read_csv(self.train_fpi, sep='\t', index_col=0)
     df = df[df['y'] == 0]
     seqs = list(df['Sequence'])
     for seq in seqs:
         ms = motif_seq.LcSeq(seq, self.k, self.lca, 'lca')
         in_seq, out_seq = ms.seq_in_motif()
         SeqOb = SequenceParameters(in_seq)
         print(SeqOb.get_kappa())
         seqOb = SequenceParameters(out_seq)
         print(seqOb.get_kappa())
         print('')