def comp(self): df = pd.read_csv(self.an_fpi, sep='\t', index_col=0) all_kmers = {} for i, row in df.iterrows(): print(i) seq = row['Sequence'] ss = row['Secondary Structure'] miss = row['Missing'] xss = self.add_x(ss, miss) seq_kmers = tools_lc.seq_to_kmers(seq, self.k) ss_kmers = tools_lc.seq_to_kmers(xss, self.k) for seq_kmer, ss_kmer in zip(seq_kmers, ss_kmers): if tools_lc.lca_motif(seq_kmer, self.lca) or tools_lc.lce_motif( seq_kmer, self.lce): if set(ss_kmer) <= {'S', 'T', 'P', 'X'}: if seq_kmer in all_kmers: all_kmers[seq_kmer] += 1 else: all_kmers[seq_kmer] = 1 for item in all_kmers: if all_kmers[item] > 200: print(item) print(all_kmers[item])
def seq_lca2(self, seqs): all_kmers = '' for seq in seqs: kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lca_motif(kmer, self.lca): all_kmers += kmer return all_kmers
def count_lca_and_lce(self, seqs): all_counts = [] for seq in seqs: count = 0 kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lce_motif(kmer, self.lce): if tools_lc.lca_motif(kmer, self.lca): count += 1 all_counts.append(count) return all_counts
def count_lca_charge(self, seqs): lca_counts = [] for seq in seqs: lca_motifs = 0 kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lca_motif(kmer, self.lca): if not tools_lc.lce_motif(kmer, self.lce): if ('K' in kmer) and ('R' in kmer) and ('E' in kmer): lca_motifs += 1 lca_counts.append(lca_motifs) return lca_counts
def seq_lca(self, seqs): seq_kmers = [] lca_counts = [] for seq in seqs: lca_motifs = 0 kmer_str = '' kmers = tools_lc.seq_to_kmers(seq, self.k) for kmer in kmers: if tools_lc.lca_motif(kmer, self.lca): kmer_str += kmer lca_motifs += 1 lca_counts.append(lca_motifs) seq_kmers.append(kmer_str) return lca_counts, seq_kmers