def plot_clusters(coords, clusters, s=1): if coords.shape[0] != clusters.shape[0]: sys.stderr.write( 'Mismatch: {} cells, {} labels\n' .format(coords.shape[0], clusters.shape[0]) ) assert(coords.shape[0] == clusters.shape[0]) colors = np.array( list(islice(cycle([ '#377eb8', '#ff7f00', '#4daf4a', '#f781bf', '#a65628', '#984ea3', '#999999', '#e41a1c', '#dede00', '#ffe119', '#e6194b', '#ffbea3', '#911eb4', '#46f0f0', '#f032e6', '#d2f53c', '#008080', '#e6beff', '#aa6e28', '#800000', '#aaffc3', '#808000', '#ffd8b1', '#000080', '#808080', '#fabebe', '#a3f4ff' ]), int(max(clusters) + 1))) ) plt.figure() plt.scatter(coords[:, 0], coords[:, 1], c=colors[clusters], s=s)
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type): y_unk_pred = y_unk_pred[:] y_unk_pred[y_unk_pred > 10000] = 10000 plt.figure() plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition, cmap='hot') plt.title(regress_type.title()) plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/acquisition_unknown_{}.png' .format(regress_type), dpi=200) plt.close()
def score_scatter(y_pred, y, var_pred, regress_type, prefix=''): y_pred = y_pred[:] y_pred[y_pred < 0] = 0 y_pred[y_pred > 10000] = 10000 plt.figure() plt.scatter(y_pred, var_pred, alpha=0.3, c=(y - y.min()) / (y.max() - y.min())) plt.viridis() plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/variance_vs_pred_{}regressors{}.png' .format(prefix, regress_type), dpi=300) plt.close()
def plot_mapping(curr_ds, curr_ref, ds_ind, ref_ind): tsne = TSNE(n_iter=400, verbose=VERBOSE, random_state=69) tsne.fit(curr_ds) plt.figure() coords_ds = tsne.embedding_[:, :] coords_ds[:, 1] += 100 plt.scatter(coords_ds[:, 0], coords_ds[:, 1]) tsne.fit(curr_ref) coords_ref = tsne.embedding_[:, :] plt.scatter(coords_ref[:, 0], coords_ref[:, 1]) x_list, y_list = [], [] for r_i, c_i in zip(ds_ind, ref_ind): x_list.append(coords_ds[r_i, 0]) x_list.append(coords_ref[c_i, 0]) x_list.append(None) y_list.append(coords_ds[r_i, 1]) y_list.append(coords_ref[c_i, 1]) y_list.append(None) plt.plot(x_list, y_list, 'b-', alpha=0.3) plt.show()
def latent_scatter(var_unk_pred, y_unk_pred, acquisition, **kwargs): chems = kwargs['chems'] chem2feature = kwargs['chem2feature'] idx_obs = kwargs['idx_obs'] idx_unk = kwargs['idx_unk'] regress_type = kwargs['regress_type'] prot_target = kwargs['prot_target'] chem_idx_obs = sorted(set([i for i, _ in idx_obs])) chem_idx_unk = sorted(set([i for i, _ in idx_unk])) feature_obs = np.array([chem2feature[chems[i]] for i in chem_idx_obs]) feature_unk = np.array([chem2feature[chems[i]] for i in chem_idx_unk]) from sklearn.neighbors import NearestNeighbors nbrs = NearestNeighbors(n_neighbors=1).fit(feature_obs) dist = np.ravel(nbrs.kneighbors(feature_unk)[0]) print('Distance Spearman r = {}, P = {}'.format( *ss.spearmanr(dist, var_unk_pred))) print('Distance Pearson rho = {}, P = {}'.format( *ss.pearsonr(dist, var_unk_pred))) X = np.vstack([feature_obs, feature_unk]) labels = np.concatenate( [np.zeros(len(chem_idx_obs)), np.ones(len(chem_idx_unk))]) sidx = np.argsort(-var_unk_pred) from fbpca import pca U, s, Vt = pca( X, k=3, ) X_pca = U * s from umap import UMAP um = UMAP( n_neighbors=15, min_dist=0.5, n_components=2, metric='euclidean', ) X_umap = um.fit_transform(X) from MulticoreTSNE import MulticoreTSNE as TSNE tsne = TSNE( n_components=2, n_jobs=20, ) X_tsne = tsne.fit_transform(X) if prot_target is None: suffix = '' else: suffix = '_' + prot_target for name, coords in zip( ['pca', 'umap', 'tsne'], [X_pca, X_umap, X_tsne], ): plt.figure() sns.scatterplot( x=coords[labels == 1, 0], y=coords[labels == 1, 1], color='blue', alpha=0.1, ) plt.scatter( x=coords[labels == 0, 0], y=coords[labels == 0, 1], color='orange', alpha=1.0, marker='x', linewidths=10, ) plt.savefig('figures/latent_scatter_{}_ypred_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=ss.rankdata(var_unk_pred), alpha=0.1, cmap='coolwarm') plt.savefig('figures/latent_scatter_{}_var_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close() plt.figure() plt.scatter(x=coords[labels == 1, 0], y=coords[labels == 1, 1], c=-acquisition, alpha=0.1, cmap='hot') plt.savefig('figures/latent_scatter_{}_acq_{}{}.png'.format( name, regress_type, suffix), dpi=300) plt.close()
def parse_log(regress_type, experiment, **kwargs): log_fname = ('iterate_davis2011kinase_{}_{}.log'.format( regress_type, experiment)) iteration = 0 iter_to_Kds = {} iter_to_idxs = {} with open(log_fname) as f: while True: line = f.readline() if not line: break if not line.startswith('2019') and not line.startswith('2020'): continue if not ' | ' in line: continue line = line.split(' | ')[1] if line.startswith('Iteration'): iteration = int(line.strip().split()[-1]) if not iteration in iter_to_Kds: iter_to_Kds[iteration] = [] if not iteration in iter_to_idxs: iter_to_idxs[iteration] = [] continue elif line.startswith('\tAcquire '): fields = line.strip().split() Kd = float(fields[-1]) iter_to_Kds[iteration].append(Kd) chem_idx = int(fields[1].lstrip('(').rstrip(',')) prot_idx = int(fields[2].strip().rstrip(')')) iter_to_idxs[iteration].append((chem_idx, prot_idx)) continue assert (iter_to_Kds.keys() == iter_to_idxs.keys()) iterations = sorted(iter_to_Kds.keys()) # Plot Kd over iterations. Kd_iter, Kd_iter_max, Kd_iter_min = [], [], [] all_Kds = [] for iteration in iterations: Kd_iter.append(np.mean(iter_to_Kds[iteration])) Kd_iter_max.append(max(iter_to_Kds[iteration])) Kd_iter_min.append(min(iter_to_Kds[iteration])) all_Kds += list(iter_to_Kds[iteration]) if iteration == 0: print('First average Kd is {}'.format(Kd_iter[0])) elif iteration > 4 and experiment == 'perprot': break print('Average Kd is {}'.format(np.mean(all_Kds))) plt.figure() plt.scatter(iterations, Kd_iter) plt.plot(iterations, Kd_iter) plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close() return # Plot differential entropy of acquired samples over iterations. chems = kwargs['chems'] prots = kwargs['prots'] chem2feature = kwargs['chem2feature'] prot2feature = kwargs['prot2feature'] d_entropies = [] X_acquired = [] for iteration in iterations: for i, j in iter_to_idxs[iteration]: chem = chems[i] prot = prots[j] X_acquired.append(chem2feature[chem] + prot2feature[prot]) if len(X_acquired) <= 1: d_entropies.append(float('nan')) else: gaussian = GaussianMixture().fit(np.array(X_acquired)) gaussian = multivariate_normal(gaussian.means_[0], gaussian.covariances_[0]) d_entropies.append(gaussian.entropy()) print('Final differential entropy is {}'.format(d_entropies[-1])) plt.figure() plt.scatter(iterations, d_entropies) plt.plot(iterations, d_entropies) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close()