示例#1
0
def visualize(assembled,
              labels,
              namespace,
              data_names,
              gene_names=None,
              gene_expr=None,
              genes=None,
              n_iter=N_ITER,
              perplexity=PERPLEXITY,
              verbose=VERBOSE,
              learn_rate=200.,
              early_exag=12.,
              embedding=None,
              size=1):
    # Fit t-SNE.
    if embedding is None:
        tsne = TSNEApprox(n_iter=n_iter,
                          perplexity=perplexity,
                          verbose=verbose,
                          random_state=69,
                          learning_rate=learn_rate,
                          early_exaggeration=early_exag)
        tsne.fit(np.concatenate(assembled))
        embedding = tsne.embedding_

    rand_idx = range(embedding.shape[0])
    random.shuffle(rand_idx)
    embedding = embedding[rand_idx, :]
    labels = labels[rand_idx]

    # Plot clusters together.
    plot_clusters(embedding, labels, s=size)
    plt.title(('Panorama ({} iter, perplexity: {}, sigma: {}, ' +
               'knn: {}, hvg: {}, dimred: {}, approx: {})').format(
                   n_iter, perplexity, SIGMA, KNN, HVG, DIMRED, APPROX))
    plt.savefig(namespace + '.svg', dpi=500)

    # Plot clusters individually.
    for i in range(len(data_names)):
        visualize_cluster(embedding,
                          i,
                          labels,
                          cluster_name=data_names[i],
                          size=size,
                          viz_prefix=namespace)

    # Plot gene expression levels.
    if (not gene_names is None) and \
       (not gene_expr is None) and \
       (not genes is None):
        gene_expr = gene_expr[rand_idx, :]
        for gene_name in gene_names:
            visualize_expr(gene_expr,
                           embedding,
                           genes,
                           gene_name,
                           size=size,
                           viz_prefix=namespace)

    return embedding
def plot_batch(df, batch):

    # Plot 50uM.

    df_50uM = df[df.conc == -3]

    if batch.startswith('Ala'):
        df_dmso = df_50uM[df_50uM.comp == 'DMSO']
        for comp in [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'IKK16' ]:
            df_comp = df_50uM[df_50uM.comp == comp]
            t, p_2side = ss.ttest_ind(df_comp.fluo, df_dmso.fluo)
            p_1side = p_2side / 2. if t < 0 else 1. - (p_2side / 2.)
            print('{}, one-sided t-test P = {}, n = {}'
                  .format(comp, p_1side, len(df_comp)))

    if batch == 'AlaA':
        order = [ 'K252a', 'SU11652', 'TG101209', 'RIF', 'DMSO' ]
    elif batch == 'AlaB':
        order = [ 'IKK16', 'K252a', 'RIF', 'DMSO' ]
    else:
        return

    plt.figure()
    sns.barplot(x='comp', y='fluo', data=df_50uM, ci=95, dodge=False,
                hue='control', palette=sns.color_palette("RdBu_r", 7),
                order=order, capsize=0.2, errcolor='#888888',)
    sns.swarmplot(x='comp', y='fluo', data=df_50uM, color='black',
                  order=order)
    #plt.ylim([ 10, 300000 ])
    if not batch.startswith('Ala'):
        plt.yscale('log')
    plt.savefig('figures/tb_culture_50uM_{}.svg'.format(batch))
    plt.close()

    # Plot dose-response.

    comps = sorted(set(df.comp))
    concentrations = sorted(set(df.conc))

    plt.figure(figsize=(24, 6))
    for cidx, comp in enumerate(order):
        df_subset = df[df.comp == comp]

        plt.subplot(1, 5, cidx + 1)
        sns.lineplot(x='conc', y='fluo', data=df_subset, ci=95,)
        sns.scatterplot(x='conc', y='fluo', data=df_subset,
                        color='black',)
        plt.title(comp)
        if batch.startswith('Ala'):
            plt.ylim([ 0., 1.3 ])
        else:
            plt.ylim([ 10, 1000000 ])
            plt.yscale('log')
        plt.xticks(list(range(-3, -6, -1)),
                   [ '50', '25', '10', ])#'1', '0.1' ])

    plt.savefig('figures/tb_culture_{}.svg'.format(batch))
    plt.close()
def acquisition_scatter(y_unk_pred, var_unk_pred, acquisition, regress_type):
    y_unk_pred = y_unk_pred[:]
    y_unk_pred[y_unk_pred > 10000] = 10000

    plt.figure()
    plt.scatter(y_unk_pred, var_unk_pred, alpha=0.5, c=-acquisition,
                cmap='hot')
    plt.title(regress_type.title())
    plt.xlabel('Predicted score')
    plt.ylabel('Variance')
    plt.savefig('figures/acquisition_unknown_{}.png'
                .format(regress_type), dpi=200)
    plt.close()
def expo(args):
    def filename_fn(args):
        rs = 'N({}, {})'.format(args.radius, args.sigma)
        return rs

    def fpath(fname):
        _fpath = os.path.join(args.output_dir, fname)
        return _fpath

    length = 5 * args.radius
    linspace, data = SyntheticDataset.grid_data(args.num_points, length=length)

    #    loader = dataset[args.dataset](args)
    #    trainData = loader.train
    #    for batch_idx, samples in enumerate(trainData):
    #        data,labels = samples[DatasetType.InD]

    plt.xlim(-1 * length, length)
    plt.ylim(-1 * length, length)

    for scale in tqdm([1, 2, 3, 4]):
        sigma = scale * args.sigma

        scale_args = deepcopy(args)
        scale_args.sigma = sigma
        fname = filename_fn(scale_args)

        checkpoint_dir = os.path.join(args.work_dir, 'checkpoints')
        saver = Saver(checkpoint_dir)  # makes directory if already not present
        payload = saver.load(hash_args(
            scale_args))  #hash_args(scale_args) generates the hex string

        def run_and_save(scale_args):
            export = main(scale_args)  #Model creation??

            payload = export['model']
            saver.save(hash_args(scale_args), payload)
            return payload

        export = payload or run_and_save(scale_args)

        with torch.no_grad():
            scores = inference(export, data)
            np_x = data.cpu().numpy()
            for key in scores:
                score = scores[key].cpu().numpy()
                plot_pcolormesh(np_x, linspace, score)
                score_fname = '{}_{}'.format(fname, key)
                plt.title(score_fname)
                flush_plot(plt, fpath(score_fname) + '.png')
    masks.shape[0] * masks.shape[1] * masks.shape[2] * masks.shape[3], 1)

y_scores = np.where(y_scores > 0.5, 1, 0)
y_true = np.where(y_true > 0.5, 1, 0)

import os
os.mkdir('./output')
output_folder = 'output/'

#Area under the ROC curve
fpr, tpr, thresholds = roc_curve((y_true), y_scores)
AUC_ROC = roc_auc_score(y_true, y_scores)
print("\nArea under the ROC curve: " + str(AUC_ROC))
roc_curve = plt.figure()
plt.plot(fpr, tpr, '-', label='Area Under the Curve (AUC = %0.4f)' % AUC_ROC)
plt.title('ROC curve')
plt.xlabel("FPR (False Positive Rate)")
plt.ylabel("TPR (True Positive Rate)")
plt.legend(loc="lower right")
plt.savefig(output_folder + "ROC.png")

#Precision-recall curve
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
precision = np.fliplr([precision])[0]
recall = np.fliplr([recall])[0]
AUC_prec_rec = np.trapz(precision, recall)
print("\nArea under Precision-Recall curve: " + str(AUC_prec_rec))
prec_rec_curve = plt.figure()
plt.plot(recall,
         precision,
         '-',
                order_list = [
                    order
                    for order, _ in sorted(order_list, key=lambda x: x[1])
                ]

                plt.subplot(1, 3, bidx + 1)
                sns.barplot(
                    x='order',
                    y='Kdpoint',
                    data=df_subset,
                    color=palette[bidx],
                    order=order_list,
                    ci=95,
                    capsize=0.4,
                    errcolor='#888888',
                )
                sns.swarmplot(
                    x='order',
                    y='Kdpoint',
                    data=df_subset,
                    color='black',
                    order=order_list,
                )
                plt.ylim([-100, 10100])
                plt.title('{} {}'.format(kinase, model))

            plt.savefig('figures/prediction_barplot_{}_{}.svg'.format(
                kinase, model))
            plt.close()
示例#7
0
    datasets, genes_list, n_cells = load_names(data_names)
    #datasets, genes = merge_datasets(datasets, genes_list)
    #datasets_dimred, genes = process_data(datasets, genes, hvg=hvg)
    datasets, genes = correct(datasets, genes_list)
    X = np.concatenate(datasets)
    X[X < 0] = 0

    cell_labels = (
        open('data/cell_labels/pancreas_cluster.txt').read().rstrip().split())
    er_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta_er']
    beta_idx = [i for i, cl in enumerate(cell_labels) if cl == 'beta']

    gadd_idx = list(genes).index('GADD45A')
    herp_idx = list(genes).index('HERPUD1')

    plt.figure()
    plt.boxplot([X[er_idx, gadd_idx], X[beta_idx, gadd_idx]], showmeans=True)
    plt.title('GADD45A (p < {})'.format(
        ttest_ind(X[er_idx, gadd_idx], X[beta_idx, gadd_idx])[1]))
    plt.xticks([1, 2], ['beta_er', 'beta'])
    plt.ylabel('Scaled gene expression')
    plt.savefig('er_stress_GADD45A.svg')

    plt.figure()
    plt.boxplot([X[er_idx, herp_idx], X[beta_idx, herp_idx]], showmeans=True)
    plt.title('HERPUD1 (p < {})'.format(
        ttest_ind(X[er_idx, herp_idx], X[beta_idx, herp_idx])[1]))
    plt.xticks([1, 2], ['beta_er', 'beta'])
    plt.ylabel('Scaled gene expression')
    plt.savefig('er_stress_HERPUD1.svg')
示例#8
0
if __name__ == '__main__':
    labels = np.array(open('data/cell_labels/all.txt').read().rstrip().split())

    # Scanorama.
    X = np.loadtxt('data/panorama_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_pan = sil(X[idx, :], labels[idx])
    print(np.median(sil_pan))

    # scran MNN.
    X = np.loadtxt('data/mnn_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_mnn = sil(X[idx, :], labels[idx])
    print(np.median(sil_mnn))

    # Seurat CCA.
    X = np.loadtxt('data/cca_embedding.txt')
    idx = np.random.choice(X.shape[0], size=20000, replace=False)
    sil_cca = sil(X[idx, :], labels[idx])
    print(np.median(sil_cca))

    print(ttest_ind(sil_pan, sil_mnn))
    print(ttest_ind(sil_pan, sil_cca))

    plt.figure()
    plt.boxplot([sil_pan, sil_mnn, sil_cca], showmeans=True)
    plt.title('Distributions of Silhouette Coefficients')
    plt.xticks([1, 2, 3], ['Scanorama', 'scran MNN', 'Seurat CCA'])
    plt.ylabel('Silhouette Coefficient')
    plt.savefig('silhouette.svg')
示例#9
0
    # Project to higher dimension.
    Z = np.absolute(np.random.randn(2, 100))
    datasets = [np.dot(s, Z) for s in samples]

    # Add batch effect "noise."
    datasets = [ds + np.random.randn(1, 100) for ds in datasets]

    # Normalize datasets.
    datasets = [normalize(ds, axis=1) for ds in datasets]

    tsne = TSNE(n_iter=400, perplexity=100, verbose=2, random_state=69)

    tsne.fit(np.concatenate(datasets[1:]))
    plot_clusters(tsne.embedding_, np.concatenate(clusters[1:]), s=500)
    plt.title('Uncorrected data')
    plt.savefig('simulation_uncorrected.svg')

    # Assemble datasets.
    assembled = assemble(datasets[1:], verbose=1, sigma=1, knn=10, approx=True)
    tsne.fit(datasets[1])
    plot_clusters(tsne.embedding_, clusters[1], s=500)
    plt.title('Dataset 1')
    plt.xlabel('t-SNE 1')
    plt.ylabel('t-SNE 2')
    plt.savefig('simulation_ds1.svg')

    tsne.fit(datasets[2])
    plot_clusters(tsne.embedding_, clusters[2], s=500)
    plt.title('Dataset 2')
    plt.xlabel('t-SNE 1')
示例#10
0
def parse_log(regress_type, experiment, **kwargs):
    log_fname = ('iterate_davis2011kinase_{}_{}.log'.format(
        regress_type, experiment))

    iteration = 0
    iter_to_Kds = {}
    iter_to_idxs = {}

    with open(log_fname) as f:

        while True:
            line = f.readline()
            if not line:
                break

            if not line.startswith('2019') and not line.startswith('2020'):
                continue
            if not ' | ' in line:
                continue

            line = line.split(' | ')[1]

            if line.startswith('Iteration'):
                iteration = int(line.strip().split()[-1])
                if not iteration in iter_to_Kds:
                    iter_to_Kds[iteration] = []
                if not iteration in iter_to_idxs:
                    iter_to_idxs[iteration] = []

                continue

            elif line.startswith('\tAcquire '):
                fields = line.strip().split()

                Kd = float(fields[-1])
                iter_to_Kds[iteration].append(Kd)

                chem_idx = int(fields[1].lstrip('(').rstrip(','))
                prot_idx = int(fields[2].strip().rstrip(')'))
                iter_to_idxs[iteration].append((chem_idx, prot_idx))

                continue

    assert (iter_to_Kds.keys() == iter_to_idxs.keys())
    iterations = sorted(iter_to_Kds.keys())

    # Plot Kd over iterations.

    Kd_iter, Kd_iter_max, Kd_iter_min = [], [], []
    all_Kds = []
    for iteration in iterations:
        Kd_iter.append(np.mean(iter_to_Kds[iteration]))
        Kd_iter_max.append(max(iter_to_Kds[iteration]))
        Kd_iter_min.append(min(iter_to_Kds[iteration]))
        all_Kds += list(iter_to_Kds[iteration])

        if iteration == 0:
            print('First average Kd is {}'.format(Kd_iter[0]))
        elif iteration > 4 and experiment == 'perprot':
            break

    print('Average Kd is {}'.format(np.mean(all_Kds)))

    plt.figure()
    plt.scatter(iterations, Kd_iter)
    plt.plot(iterations, Kd_iter)
    plt.fill_between(iterations, Kd_iter_min, Kd_iter_max, alpha=0.3)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/Kd_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()

    return

    # Plot differential entropy of acquired samples over iterations.

    chems = kwargs['chems']
    prots = kwargs['prots']
    chem2feature = kwargs['chem2feature']
    prot2feature = kwargs['prot2feature']

    d_entropies = []
    X_acquired = []
    for iteration in iterations:
        for i, j in iter_to_idxs[iteration]:
            chem = chems[i]
            prot = prots[j]
            X_acquired.append(chem2feature[chem] + prot2feature[prot])
        if len(X_acquired) <= 1:
            d_entropies.append(float('nan'))
        else:
            gaussian = GaussianMixture().fit(np.array(X_acquired))
            gaussian = multivariate_normal(gaussian.means_[0],
                                           gaussian.covariances_[0])
            d_entropies.append(gaussian.entropy())

    print('Final differential entropy is {}'.format(d_entropies[-1]))

    plt.figure()
    plt.scatter(iterations, d_entropies)
    plt.plot(iterations, d_entropies)
    plt.viridis()
    plt.title(' '.join([regress_type, experiment]))
    plt.savefig('figures/entropy_over_iterations_{}_{}.png'.format(
        regress_type, experiment))
    plt.close()