Exemplo n.º 1
0
uncorrected = np.array(uncorrected)

below = sum(scanorama > uncorrected + 50)
above = sum(scanorama < uncorrected - 50)

print('{}% above line'.format(float(above) / float(above + below) * 100))

name = sys.argv[1].split('.')[0]
line = max(min(max(scanorama), max(uncorrected)), 2100)

from scipy.stats import pearsonr

print(pearsonr(scanorama, uncorrected))

plt.figure()
plt.scatter(scanorama, uncorrected, s=10)
plt.plot([0, line], [0, line], 'r--')
plt.xlim([0, 2100])
plt.tight_layout()
plt.savefig('oneway_scanorama.png')

# scran MNN.

scranmnn, uncorrected = [], []
for gene in set(mnn.keys()) & set(uncor.keys()):
    scranmnn.append(mnn[gene])
    uncorrected.append(uncor[gene])
scranmnn = np.array(scranmnn)
uncorrected = np.array(uncorrected)

below = sum(scranmnn > uncorrected + 50)
Exemplo n.º 2
0
from scanorama import plt
plt.rcParams.update({'font.size': 25})
import seaborn as sns

sizes = [
    4185,
    68579,
    465281,
    665858,
]

times = [
    13.6908118724823,
    50.770941495895386,
    483.3363349437714,
    617.4005923271179,
]

plt.figure()
plt.plot([4185, 665858], [30, 630], '--')
plt.scatter(sizes, times)
plt.xticks(sizes, rotation=30)
plt.xlabel('Data set size')
plt.ylabel('Time (seconds)')
plt.savefig('svd.svg')
Exemplo n.º 3
0
            entropies = []
        curr_method = line
        continue

    k = int(fields[2].rstrip(','))
    #if k > 15:
    #    continue

    ks.append(k)
    entropies.append(float(fields[-1]))

data[curr_method] = entropies

plt.figure()

for method in data.keys():
    label = (method.capitalize().replace('geosketch', 'GeoSketch').replace(
        'srs', 'SRS').replace('uniform', 'Uniform').replace('_', ' + '))

    #if not 'harmony' in method and method != 'uncorrected':
    #    continue

    plt.plot(ks, data[method], label=label)
    plt.scatter(ks, data[method])

plt.legend()
plt.xlabel('k-means, number of clusters')
plt.ylabel('Data set mixing (average normalized entropy)')
plt.ylim([-0.1, 1.05])
plt.savefig('entropies.svg')
Exemplo n.º 4
0
def plot_stats(stat,
               samp_fns=None,
               fname=None,
               dtype=float,
               only_fns=None,
               only_replace=None,
               max_N=None):
    if samp_fns is None:
        assert (fname is not None)
        samp_fns = parse_stats(fname)

    colors = [
        #'#377eb8', '#ff7f00', '#f781bf',
        #'#4daf4a', '#ff0000', '#a65628', '#984ea3',
        #'#999999', '#e41a1c', '#dede00',
        #'#ffe119', '#e6194b', '#ffbea3',
        #'#911eb4', '#46f0f0', '#f032e6',
        #'#d2f53c', '#008080', '#e6beff',
        #'#aa6e28', '#800000', '#aaffc3',
        #'#808000', '#ffd8b1', '#000080',
        #'#808080', '#fabebe', '#a3f4ff'
        '#377eb8',
        '#ff7f00',
        '#4daf4a',
        '#984ea3',
        #'#f781bf', '#a65628', '#984ea3',
        '#999999',
        '#e41a1c',
        '#dede00',
        '#ffe119',
        '#e6194b',
        '#ffbea3',
        '#911eb4',
        '#46f0f0',
        '#f032e6',
        '#d2f53c',
        '#008080',
        '#e6beff',
        '#aa6e28',
        '#800000',
        '#aaffc3',
        '#808000',
        '#ffd8b1',
        '#000080',
        '#808080',
        '#fabebe',
        '#a3f4ff'
    ]

    plt.figure()

    c_idx = 0

    for s_idx, (samp_fn, replace) in enumerate(
            sorted(samp_fns, key=lambda x: '{}_{}'.format(*x))):

        if samp_fn.startswith('_'):
            continue
        if only_fns is not None and samp_fn not in only_fns:
            continue
        if only_replace is not None and replace != only_replace:
            continue

        Ns = []
        means = []
        sems = []
        for N in samp_fns[(samp_fn, replace)]:
            if max_N is not None and N > max_N:
                continue
            stat_vals = [
                dtype(stat_dict[stat])
                for stat_dict in samp_fns[(samp_fn, replace)][N]
                if stat in stat_dict
            ]
            if len(stat_vals) == 0:
                continue
            Ns.append(N)
            means.append(np.mean(stat_vals))
            sems.append(ss.sem(stat_vals))

        sort_idx = np.argsort(Ns)
        Ns = np.array(Ns)[sort_idx]
        means = np.array(means)[sort_idx]
        sems = np.array(sems)[sort_idx]

        label = '{}_{}'.format(samp_fn, replace)

        plt.plot(Ns, means, color=colors[c_idx], label=label)
        plt.scatter(Ns, means, color=colors[c_idx])
        plt.fill_between(Ns,
                         means - sems,
                         means + sems,
                         alpha=0.3,
                         color=colors[c_idx])

        c_idx = (c_idx + 1) % len(colors)

    namespace = samp_fns[('_namespace', None)]
    title = '{}_{}'.format(namespace, stat)
    if only_replace is not None:
        title += '_replace{}'.format(only_replace)

    plt.title(title)
    plt.xlabel('Sample size')
    plt.ylabel(stat)
    plt.legend()
    mkdir_p('target/stats_plots')
    plt.savefig('target/stats_plots/{}.svg'.format(title))