def visualize(assembled, labels, namespace, data_names, gene_names=None, gene_expr=None, genes=None, n_iter=N_ITER, perplexity=PERPLEXITY, verbose=VERBOSE, learn_rate=200., early_exag=12., embedding=None, size=1): # Fit t-SNE. if embedding is None: tsne = TSNEApprox(n_iter=n_iter, perplexity=perplexity, verbose=verbose, random_state=69, learning_rate=learn_rate, early_exaggeration=early_exag) tsne.fit(np.concatenate(assembled)) embedding = tsne.embedding_ rand_idx = range(embedding.shape[0]) random.shuffle(rand_idx) embedding = embedding[rand_idx, :] labels = labels[rand_idx] # Plot clusters together. plot_clusters(embedding, labels, s=size) plt.title(('Panorama ({} iter, perplexity: {}, sigma: {}, ' + 'knn: {}, hvg: {}, dimred: {}, approx: {})').format( n_iter, perplexity, SIGMA, KNN, HVG, DIMRED, APPROX)) plt.savefig(namespace + '.svg', dpi=500) # Plot clusters individually. for i in range(len(data_names)): visualize_cluster(embedding, i, labels, cluster_name=data_names[i], size=size, viz_prefix=namespace) # Plot gene expression levels. if (not gene_names is None) and \ (not gene_expr is None) and \ (not genes is None): gene_expr = gene_expr[rand_idx, :] for gene_name in gene_names: visualize_expr(gene_expr, embedding, genes, gene_name, size=size, viz_prefix=namespace) return embedding
def plot_batch(df, batch): # Plot 50uM. df_50uM = df[df.conc == -3] if batch.startswith('Ala'): df_dmso = df_50uM[df_50uM.comp == 'DMSO'] for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]: df_comp = df_50uM[df_50uM.comp == comp] t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo) p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.) print('{}, one-sided t-test P = {}, n = {}' .format(comp, p_1side, len(df_comp))) if batch == 'AlaA': order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ] elif batch == 'AlaB': order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ] else: return plt.figure() sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False, hue='control', palette=sns.color_palette("RdBu_r", 7), order=order, capsize=0.2, errcolor='#888888',) sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black', order=order) #plt.ylim([ 10, 300000 ]) if not batch.startswith('Ala'): plt.yscale('log') plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch)) plt.close() # Plot dose-response. comps = sorted(set(df.comp)) concentrations = sorted(set(df.conc)) plt.figure(figsize=(24, 6)) for cidx, comp in enumerate(order): df_subset = df[df.comp == comp] plt.subplot(1, 5, cidx + 1) sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,) sns.scatterplot(x='conc', y='fluo', data=df_subset, color='black',) plt.title(comp) if batch.startswith('Ala'): plt.ylim([ 0., 1.3 ]) else: plt.ylim([ 10, 1000000 ]) plt.yscale('log') plt.xticks(list(range(-3, -6, -1)), [ '50', '25', '10', ])#'1', '0.1' ]) plt.savefig('figures/tb_culture_{}.svg'.format(batch)) plt.close()
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type): y_unk_pred = y_unk_pred[:] y_unk_pred[y_unk_pred > 10000] = 10000 plt.figure() plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition, cmap='hot') plt.title(regress_type.title()) plt.xlabel('Predicted score') plt.ylabel('Variance') plt.savefig('figures/acquisition_unknown_{}.png' .format(regress_type), dpi=200) plt.close()
def expo(args): def filename_fn(args): rs = 'N({}, {})'.format(args.radius, args.sigma) return rs def fpath(fname): _fpath = os.path.join(args.output_dir, fname) return _fpath length = 5 * args.radius linspace, data = SyntheticDataset.grid_data(args.num_points, length=length) # loader = dataset[args.dataset](args) # trainData = loader.train # for batch_idx, samples in enumerate(trainData): # data,labels = samples[DatasetType.InD] plt.xlim(-1 * length, length) plt.ylim(-1 * length, length) for scale in tqdm([1, 2, 3, 4]): sigma = scale * args.sigma scale_args = deepcopy(args) scale_args.sigma = sigma fname = filename_fn(scale_args) checkpoint_dir = os.path.join(args.work_dir, 'checkpoints') saver = Saver(checkpoint_dir) # makes directory if already not present payload = saver.load(hash_args( scale_args)) #hash_args(scale_args) generates the hex string def run_and_save(scale_args): export = main(scale_args) #Model creation?? payload = export['model'] saver.save(hash_args(scale_args), payload) return payload export = payload or run_and_save(scale_args) with torch.no_grad(): scores = inference(export, data) np_x = data.cpu().numpy() for key in scores: score = scores[key].cpu().numpy() plot_pcolormesh(np_x, linspace, score) score_fname = '{}_{}'.format(fname, key) plt.title(score_fname) flush_plot(plt, fpath(score_fname) + '.png')
masks.shape[0] * masks.shape[1] * masks.shape[2] * masks.shape[3], 1) y_scores = np.where(y_scores > 0.5, 1, 0) y_true = np.where(y_true > 0.5, 1, 0) import os os.mkdir('./output') output_folder = 'output/' #Area under the ROC curve fpr, tpr, thresholds = roc_curve((y_true), y_scores) AUC_ROC = roc_auc_score(y_true, y_scores) print("\nArea under the ROC curve: " + str(AUC_ROC)) roc_curve = plt.figure() plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC) plt.title('ROC curve') plt.xlabel("FPR (False Positive Rate)") plt.ylabel("TPR (True Positive Rate)") plt.legend(loc="lower right") plt.savefig(output_folder + "ROC.png") #Precision-recall curve precision, recall, thresholds = precision_recall_curve(y_true, y_scores) precision = np.fliplr([precision])[0] recall = np.fliplr([recall])[0] AUC_prec_rec = np.trapz(precision, recall) print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec)) prec_rec_curve = plt.figure() plt.plot(recall, precision, '-',
order_list = [ order for order, _ in sorted(order_list, key=lambda x: x[1]) ] plt.subplot(1, 3, bidx + 1) sns.barplot( x='order', y='Kdpoint', data=df_subset, color=palette[bidx], order=order_list, ci=95, capsize=0.4, errcolor='#888888', ) sns.swarmplot( x='order', y='Kdpoint', data=df_subset, color='black', order=order_list, ) plt.ylim([-100, 10100]) plt.title('{} {}'.format(kinase, model)) plt.savefig('figures/prediction_barplot_{}_{}.svg'.format( kinase, model)) plt.close()
datasets, genes_list, n_cells = load_names(data_names) #datasets, genes = merge_datasets(datasets, genes_list) #datasets_dimred, genes = process_data(datasets, genes, hvg=hvg) datasets, genes = correct(datasets, genes_list) X = np.concatenate(datasets) X[X < 0] = 0 cell_labels = ( open('data/cell_labels/pancreas_cluster.txt').read().rstrip().split()) er_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta_er'] beta_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta'] gadd_idx = list(genes).index('GADD45A') herp_idx = list(genes).index('HERPUD1') plt.figure() plt.boxplot([X[er_idx, gadd_idx], X[beta_idx, gadd_idx]], showmeans=True) plt.title('GADD45A (p < {})'.format( ttest_ind(X[er_idx, gadd_idx], X[beta_idx, gadd_idx])[1])) plt.xticks([1, 2], ['beta_er', 'beta']) plt.ylabel('Scaled gene expression') plt.savefig('er_stress_GADD45A.svg') plt.figure() plt.boxplot([X[er_idx, herp_idx], X[beta_idx, herp_idx]], showmeans=True) plt.title('HERPUD1 (p < {})'.format( ttest_ind(X[er_idx, herp_idx], X[beta_idx, herp_idx])[1])) plt.xticks([1, 2], ['beta_er', 'beta']) plt.ylabel('Scaled gene expression') plt.savefig('er_stress_HERPUD1.svg')
if __name__ == '__main__': labels = np.array(open('data/cell_labels/all.txt').read().rstrip().split()) # Scanorama. X = np.loadtxt('data/panorama_embedding.txt') idx = np.random.choice(X.shape[0], size=20000, replace=False) sil_pan = sil(X[idx, :], labels[idx]) print(np.median(sil_pan)) # scran MNN. X = np.loadtxt('data/mnn_embedding.txt') idx = np.random.choice(X.shape[0], size=20000, replace=False) sil_mnn = sil(X[idx, :], labels[idx]) print(np.median(sil_mnn)) # Seurat CCA. X = np.loadtxt('data/cca_embedding.txt') idx = np.random.choice(X.shape[0], size=20000, replace=False) sil_cca = sil(X[idx, :], labels[idx]) print(np.median(sil_cca)) print(ttest_ind(sil_pan, sil_mnn)) print(ttest_ind(sil_pan, sil_cca)) plt.figure() plt.boxplot([sil_pan, sil_mnn, sil_cca], showmeans=True) plt.title('Distributions of Silhouette Coefficients') plt.xticks([1, 2, 3], ['Scanorama', 'scran MNN', 'Seurat CCA']) plt.ylabel('Silhouette Coefficient') plt.savefig('silhouette.svg')
# Project to higher dimension. Z = np.absolute(np.random.randn(2, 100)) datasets = [np.dot(s, Z) for s in samples] # Add batch effect "noise." datasets = [ds + np.random.randn(1, 100) for ds in datasets] # Normalize datasets. datasets = [normalize(ds, axis=1) for ds in datasets] tsne = TSNE(n_iter=400, perplexity=100, verbose=2, random_state=69) tsne.fit(np.concatenate(datasets[1:])) plot_clusters(tsne.embedding_, np.concatenate(clusters[1:]), s=500) plt.title('Uncorrected data') plt.savefig('simulation_uncorrected.svg') # Assemble datasets. assembled = assemble(datasets[1:], verbose=1, sigma=1, knn=10, approx=True) tsne.fit(datasets[1]) plot_clusters(tsne.embedding_, clusters[1], s=500) plt.title('Dataset 1') plt.xlabel('t-SNE 1') plt.ylabel('t-SNE 2') plt.savefig('simulation_ds1.svg') tsne.fit(datasets[2]) plot_clusters(tsne.embedding_, clusters[2], s=500) plt.title('Dataset 2') plt.xlabel('t-SNE 1')
def parse_log(regress_type, experiment, **kwargs): log_fname = ('iterate_davis2011kinase_{}_{}.log'.format( regress_type, experiment)) iteration = 0 iter_to_Kds = {} iter_to_idxs = {} with open(log_fname) as f: while True: line = f.readline() if not line: break if not line.startswith('2019') and not line.startswith('2020'): continue if not ' | ' in line: continue line = line.split(' | ')[1] if line.startswith('Iteration'): iteration = int(line.strip().split()[-1]) if not iteration in iter_to_Kds: iter_to_Kds[iteration] = [] if not iteration in iter_to_idxs: iter_to_idxs[iteration] = [] continue elif line.startswith('\tAcquire '): fields = line.strip().split() Kd = float(fields[-1]) iter_to_Kds[iteration].append(Kd) chem_idx = int(fields[1].lstrip('(').rstrip(',')) prot_idx = int(fields[2].strip().rstrip(')')) iter_to_idxs[iteration].append((chem_idx, prot_idx)) continue assert (iter_to_Kds.keys() == iter_to_idxs.keys()) iterations = sorted(iter_to_Kds.keys()) # Plot Kd over iterations. Kd_iter, Kd_iter_max, Kd_iter_min = [], [], [] all_Kds = [] for iteration in iterations: Kd_iter.append(np.mean(iter_to_Kds[iteration])) Kd_iter_max.append(max(iter_to_Kds[iteration])) Kd_iter_min.append(min(iter_to_Kds[iteration])) all_Kds += list(iter_to_Kds[iteration]) if iteration == 0: print('First average Kd is {}'.format(Kd_iter[0])) elif iteration > 4 and experiment == 'perprot': break print('Average Kd is {}'.format(np.mean(all_Kds))) plt.figure() plt.scatter(iterations, Kd_iter) plt.plot(iterations, Kd_iter) plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close() return # Plot differential entropy of acquired samples over iterations. chems = kwargs['chems'] prots = kwargs['prots'] chem2feature = kwargs['chem2feature'] prot2feature = kwargs['prot2feature'] d_entropies = [] X_acquired = [] for iteration in iterations: for i, j in iter_to_idxs[iteration]: chem = chems[i] prot = prots[j] X_acquired.append(chem2feature[chem] + prot2feature[prot]) if len(X_acquired) <= 1: d_entropies.append(float('nan')) else: gaussian = GaussianMixture().fit(np.array(X_acquired)) gaussian = multivariate_normal(gaussian.means_[0], gaussian.covariances_[0]) d_entropies.append(gaussian.entropy()) print('Final differential entropy is {}'.format(d_entropies[-1])) plt.figure() plt.scatter(iterations, d_entropies) plt.plot(iterations, d_entropies) plt.viridis() plt.title(' '.join([regress_type, experiment])) plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format( regress_type, experiment)) plt.close()