示例#1
0
文件: grouping.py 项目: lcurley/PING
def plot_stat_distributions(stats, group_names):
    # Show the distribution of stats, to see if there are
    # reliable group differences.
    fh4 = plt.figure(figsize=(18, 8))
    lbls = ['%s vs. %s' % (gn1, gn2)
            for gi, gn1 in enumerate(group_names)
            for gn2 in group_names[gi + 1:]]
    pi = 1
    for si, stat_name in enumerate(['mean', 'var']):
        stat_vals = np.asarray([ss[:, si * 2] for ss in stats])
        pvals = np.asarray([ss[:, si * 2 + 1] for ss in stats])
        for li, lbl in enumerate(lbls):
            ax1 = fh4.add_subplot(2, len(lbls), pi)
            plot_normalized_hist(pvals[:, li], ax=ax1, bins=[0.0001, 0.001, 0.01, 0.05, 0.10, 0.20, 0.30, 0.40, 0.50, 0.60])
            ax1.set_title(lbl)
            if li == 0:
                ax1.set_ylabel(stat_name)
            pi += 1
    equalize_xlims(fh4)
    equalize_ylims(fh4)

    means = np.asarray([ss[:, 0] for ss in stats])
    stds = np.asarray([ss[:, 2] for ss in stats])
    good_idx = ~np.logical_or(np.isnan(means.sum(1)), np.isnan(stds.sum(1)))

    fh = plt.figure(figsize=(18, 8))
    ax1 = fh.add_subplot(1, 2, 1)
    plot_normalized_hist(means[good_idx], ax=ax1, bins=25)
    ax1.set_xlabel('mean')

    ax2 = fh.add_subplot(1, 2, 2)
    plot_normalized_hist(stds[good_idx], ax=ax2, bins=25)
    ax2.set_xlabel('standard deviation')
    ax2.legend(group_names)
示例#2
0
文件: grouping.py 项目: lcurley/PING
def compare_group_asymmetry(data, xaxis_key, yaxis_key, grouping_keys, plots, measure_key):
    """ Groups data according to grouping_keys, computes
    asymmetry index for key, and graphs."""

    group_names, grouping_index = get_groupings(data, grouping_keys)

    x_data = np.asarray(data[xaxis_key].tolist())
    prop_ai = get_asymmetry_index(data, yaxis_key)

    n_subplots = len(group_names)
    n_rows = 1  # int(np.round(np.sqrt(n_subplots)))
    n_cols = n_subplots  # int(np.ceil(n_subplots / float(n_rows)))

    if 'regressions' in plots:
        fh1 = plt.figure(figsize=(18, 7))
        # fh1.suptitle('%s regression' % yaxis_key)
    if 'distributions' in plots:
        fh2 = plt.figure(figsize=(18, 7))
        fh2.suptitle('%s distributions' % yaxis_key)
        n_bins = 15
        bins = np.linspace(prop_ai[~np.isnan(prop_ai)].min(),
                           prop_ai[~np.isnan(prop_ai)].max(),
                           n_bins + 2)[1:-1]

    group_samples = []
    regressions = []
    for gi, group_name in enumerate(group_names):

        # Index the current group
        if group_name == 'all':
            idx = np.ones((len(data.values()[0]),), dtype=bool)
        else:
            idx = grouping_index[gi]

        # Select data within the group
        cur_x = x_data[idx]
        group_ai = prop_ai[idx]

        # Remove bad data
        bad_idx = np.logical_or(np.isnan(cur_x), np.isnan(group_ai))
        good_idx = np.logical_not(bad_idx)
        cur_x = cur_x[good_idx]
        group_ai = group_ai[good_idx]

        group_samples.append(group_ai)
        regressions.append(scipy.stats.linregress(cur_x, group_ai))

        # Plot the regression result
        params = dict(xlabel=xaxis_key, ylabel='Asymmetry Index (LH - RH)',
                      title='Group: %s (n=%d)' % (group_name, len(group_ai)))

        if 'regressions' in plots:
            if gi > 0:
                del params['ylabel']
            # ax1 = fh1.add_subplot(n_rows, n_cols, gi + 1)
            ax1 = fh1.gca()
            do_and_plot_regression(cur_x, group_ai, ax=ax1, colori=gi,
                                   show_std=(len(cur_x) > 200), **params)
            ax1.set_title(measure_key)  # ax1.get_title().split('\n')[0])

        # Plot the distribution result
        if 'distributions' in plots:
            ax2 = fh2.add_subplot(n_rows, n_cols, gi + 1)
            plot_normalized_hist(group_ai, ax2, bins=bins)
            ax2.set_title(params['title'])
            ax2.set_xlabel(params['ylabel'])
            ax2.set_ylims([0, 0.25])
    regressions = np.asarray(regressions)

    # stats[:, n]: 0:mean_stat: 1:mean_pval; 2:std_stat, 3:std_pval
    # shape: n_compares x 4
    stats = np.asarray([(scipy.stats.ttest_ind(gsamps1, gsamps2) +
                         scipy.stats.levene(gsamps1, gsamps2))
                        for gi, gsamps1 in enumerate(group_samples)
                        for gsamps2 in group_samples[gi + 1:]])


    # Test whether variances differ
    if 'stats' in plots:
        dist_mat = scipy.spatial.distance.squareform(stats[:, 2])
        sig_mat = stats[:, 3] <= (0.05 / stats.shape[0])

        fh3 = plt.figure()
        fh3.suptitle(str(['%.2e' % s for s in stats[:, 3]]))

        ax1 = fh3.add_subplot(1, 2, 1)
        plot_symmetric_matrix_as_triangle(dist_mat, ax=ax1, labels=group_names)

        ax2 = fh3.add_subplot(1, 2, 2, axisbg=fh3.get_facecolor())
        plot_symmetric_matrix_as_triangle(sig_mat, ax=ax2, labels=group_names)

    if 'distributions' in plots:
        equalize_xlims(fh2)
        equalize_ylims(fh2)
    

    return group_names, stats, regressions, group_samples
示例#3
0
文件: grouping.py 项目: lcurley/PING
def plot_regressions_scatter(regressions, group_names, measure_names):
    # Dump a tsv of group rvals, pvals, and coeff of variation
    n_measures = len(regressions)
    stat_names = ['rval', 'pval']
    color_arr = ['b', 'g', 'r', 'k', 'y', 'c'][:len(group_names)]

    prefix = np.unique([m[:12] for m in measure_names])
    all_xvals = dict([(gn, []) for gn in group_names])
    all_yvals = dict([(gn, []) for gn in group_names])

    for p in prefix:
        prefix_idx = np.asarray([m.startswith(p) for m in measure_names])

        p_xvals = dict()
        p_yvals = dict()
        for gi, group_name in enumerate(group_names):
            # Plot rval vs. pval
            group_xvals = np.asarray([regressions[mi][gi, 2]
                                      for mi in np.nonzero(prefix_idx)[0]])
            group_yvals = np.asarray([regressions[mi][gi, 3]
                                      for mi in np.nonzero(prefix_idx)[0]])

            good_idx = ~np.logical_or(np.isnan(group_xvals),
                                      np.isnan(group_yvals))
            p_xvals[group_name] = group_xvals[good_idx]
            p_yvals[group_name] = group_yvals[good_idx]

            all_xvals[group_name] += group_xvals[good_idx].tolist()
            all_yvals[group_name] += group_yvals[good_idx].tolist()

        fh = plt.figure(figsize=(18, 8))
        ax1 = fh.add_subplot(1, 2, 1)
        plot_normalized_hist(
            np.asarray([p_xvals[gn] for gn in group_names]).T,
            ax=ax1,
            bins=20,
            color=color_arr)
        ax1.legend(group_names)
        ax2 = fh.add_subplot(1, 2, 2)
        plot_normalized_hist(
            np.asarray([p_yvals[gn] for gn in group_names]).T,
            ax=ax2,
            bins=20,
            color=color_arr)
        fh.suptitle(p)

    # Bar plot (total)
    fh = plt.figure(figsize=(18, 8))
    ax1 = fh.add_subplot(1, 2, 1)
    plot_normalized_hist(np.asarray([all_xvals[gn] for gn in group_names]).T,
                         ax=ax1,
                         bins=50,
                         color=color_arr)
    ax1.legend(group_names)
    ax2 = fh.add_subplot(1, 2, 2)
    plot_normalized_hist(np.asarray([all_yvals[gn] for gn in group_names]).T,
                         ax=ax2,
                         bins=50,
                         color=color_arr)
    fh.suptitle('Over all measures')

    #ax = plt.figure(figsize=(18, 8)).gca()
    ax1.scatter(np.asarray([all_xvals[gn] for gn in group_names]).T,
                30 * np.asarray([all_yvals[gn] for gn in group_names]).T,
                c=color_arr)