# coding: utf-8 from Bio import SeqIO peptides = SeqIO('Data/anti_microbial_peptide.fasta', 'fasta') peptides = SeqIO.parse('Data/anti_microbial_peptide.fasta', 'fasta') peptides peptides = [] for seq in SeqIO.p('Data/anti_microbial_peptide.fasta', 'fasta'): peptides.append(seq) for seq in SeqIO.parse('Data/anti_microbial_peptide.fasta', 'fasta'): peptides.append(seq) peptides import pandas as pd from protein_sequence_features import protein_features from protein_sequence_features import protein_features features = map(protein_features, peptides) features[0] peptides_dataset = pd.DataFrame(features, index=map(lambda x: x.id, peptides)) peptides_dataset peptides_dataset.to_csv('Data/anti_microbial_peptide_features.csv') from sklearn.manifold import TSNE tsne = TSNE() tsne.fit(peptides_dataset.values) X = tsne.fit_transform(peptides_dataset.values) import matplotlib.pyplot as plt import seaborn as sns plt.scatter(X) plt.scatter(X[:, 0], X[:, 1]) plt.imshow() plt.show()