Пример #1
0
def test_approx(datasets_dimred, genes, labels, idx, distr, xlabels):
    integrated = assemble(datasets_dimred, approx=False)
    X = np.concatenate(integrated)
    distr.append(sil(X[idx, :], labels[idx]))
    xlabels.append('Exact NN')

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('approx'))
Пример #2
0
def test_alpha(datasets_dimred, genes, labels, idx, distr, xlabels):
    alphas = [0, 0.05, 0.20, 0.30]
    for alpha in alphas:
        integrated = assemble(datasets_dimred, alpha=alpha)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        xlabels.append(str(alpha))

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('alpha'))
Пример #3
0
def test_sigma(datasets_dimred, genes, labels, idx, distr, xlabels):
    sigmas = [10, 50, 100, 200]
    for sigma in sigmas:
        integrated = assemble(datasets_dimred, sigma=sigma)
        X = np.concatenate(integrated)
        distr.append(sil(X[idx, :], labels[idx]))
        xlabels.append(str(sigma))

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('sigma'))
Пример #4
0
def test_learn_rate(datasets_dimred, genes, labels, idx, distr, xlabels):
    X = np.concatenate(datasets_dimred)

    learn_rates = [50., 100., 500., 1000.]
    for learn_rate in learn_rates:
        embedding = fit_tsne(X, learn_rate=learn_rate)
        distr.append(sil(embedding[idx, :], labels[idx]))
        xlabels.append(str(learn_rate))

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('learn_rate'))
Пример #5
0
def test_perplexity(datasets_dimred, genes, labels, idx, distr, xlabels):
    X = np.concatenate(datasets_dimred)

    perplexities = [10, 100, 500, 2000]
    for perplexity in perplexities:
        embedding = fit_tsne(X, perplexity=perplexity)
        distr.append(sil(embedding[idx, :], labels[idx]))
        xlabels.append(str(perplexity))

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('perplexity'))
Пример #6
0
def test_dimred(datasets, genes, labels, idx, distr, xlabels):
    dimreds = [10, 20, 50, 200, 6000]
    for dimred in dimreds:
        datasets_dimred, genes = process_data(datasets, genes, dimred=dimred)
        datasets_dimred = assemble(datasets_dimred)
        X = np.concatenate(datasets_dimred)
        distr.append(sil(X[idx, :], labels[idx]))
        xlabels.append(str(dimred))
    xlabels[-1] = 'No SVD'

    plt.figure()
    plt.boxplot(distr, showmeans=True, whis='range')
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('dimred'))
Пример #7
0
    plt.xticks(range(1, len(xlabels) + 1), xlabels)
    plt.ylabel('Silhouette Coefficient')
    plt.ylim((-1, 1))
    plt.savefig('param_sensitivity_{}.svg'.format('learn_rate'))


if __name__ == '__main__':
    with open('conf/panorama.txt') as f:
        data_names = f.read().split()

    labels = np.array(open('data/cell_labels/all.txt').read().rstrip().split())
    idx = np.random.choice(labels.shape[0], replace=False)

    # scran MNN baseline.
    X = np.loadtxt('data/corrected_mnn.txt')
    sil_mnn = sil(X[idx, :], labels[idx])
    print(np.median(sil_mnn))

    # Seurat CCA baseline.
    X = np.loadtxt('data/corrected_seurat.txt')
    sil_cca = sil(X[idx, :], labels[idx])
    print(np.median(sil_cca))

    datasets, genes_list, n_cells = load_names(data_names)
    datasets, genes = merge_datasets(datasets, genes_list)
    datasets_dimred, genes = process_data(datasets, genes)

    # Baseline without correction.
    X = np.concatenate(datasets_dimred)
    sil_non = sil(X[idx, :], labels[idx])
    print(np.median(sil_non))