Exemplo n.º 1
0
# <codecell>

seq_df, known_subs = aligned_seqs['Int'].dropna().align(nlanl_data['PSSMScore'].dropna(), join='inner')

# <codecell>

from SeqSklearn import BinBasedCluster

clust = BinBasedCluster(bins=pssm_bins)
getter = itemgetter(*range(109-17,109+17))
seq_list_seqs = [list(l) for l in seq_df.values]
seq_region = np.array(map(getter, seq_list_seqs))
region_transform.fit(seq_region, np.ones((seq_region.shape[0], 1)))
seq_data = region_transform.transform(seq_region)

pca_trans, biny, xx, yy, Z = clust.make_vern_points(seq_data, known_subs.values)

# <codecell>

from pylab import get_cmap
plt.figure(figsize=(10,10))
jitter = 0.01*np.random.randn(*pca_trans.shape)+pca_trans
plt.scatter(jitter[:,0], jitter[:,1], vmax = 0, c=known_subs, cmap=get_cmap('copper_r'), alpha=0.5)
cbar = plt.colorbar()
cbar.set_label('PSSMScore')
plt.ylabel('PC-1')
plt.xlabel('PC-2')

# <codecell>

from sklearn.cross_validation import permutation_test_score, Bootstrap
Exemplo n.º 2
0
e = trop_data.groupby('n_clusters')['score'].std()
plt.errorbar(t.index, t.values, yerr=e.values)
plt.title('Clustering of North American V3 sequnces')
plt.xlabel('Cluster Size')
plt.xlim([1.5, 60])
plt.ylim([0, 1])
plt.ylabel('Silhouette Score')
plt.savefig('final_figures/long_NA_v3_clustering.png', dpi = 1000)

# <codecell>

from SeqSklearn import BinBasedCluster

bin_clust = BinBasedCluster(bins = pssm_bins)

pca_trans, biny, xx, yy, Z = bin_clust.make_vern_points(NA_blood_df.values, NA_wanted_lanl['PSSMScore'])


# <codecell>

from pylab import get_cmap
plt.figure(figsize=(10,10))
jitter = 0.1*np.random.randn(*pca_trans.shape)+pca_trans
plt.scatter(jitter[:,0], jitter[:,1], vmax = 0, c=NA_wanted_lanl['PSSMScore'], cmap=get_cmap('copper_r'), alpha=0.5)
cbar = plt.colorbar()
cbar.set_label('PSSMScore')
plt.ylabel('PC-1')
plt.xlabel('PC-2')

# <codecell>