示例#1
0
def plot_pooled_correlation_dists_by_condition(cfg):
    """
    Plot the pooled (ie. combined, pooled) correlation distributions
    separately for each of the conditions (each list in fname_group_list)
    corresponds to a condition.

    Parameters
    ----------
    cfg : dict
        the user-specified config dictionary.
        The following keys are required::

            "group_1_mat_fnames"
            "group_2_mat_fnames"
            "group_1_color"
            "group_2_color"
            "group_1_label"
            "group_2_label"

    Returns
    -------
    fig : the matplotlib figure object
    """
    # CFGCHANGE?
    config.require(cfg, [
        "group_1_mat_fnames", "group_2_mat_fnames", "group_1_color",
        "group_2_color", "group_1_label", "group_2_label"
    ])
    fname_group_list = [cfg["group_1_mat_fnames"], cfg["group_2_mat_fnames"]]
    colors = [cfg["group_1_color"], cfg["group_2_color"]]
    labels = [cfg["group_1_label"], cfg["group_2_label"]]

    n_bins = 100
    corr_bins, corr_bin_centers = aux.get_lin_bins(n_bins, -1, 1.)
    bin_counts = [np.zeros(n_bins) for f in fname_group_list]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    for i, fname_group in enumerate(fname_group_list):
        for fname in fname_group:
            flat_corr_mat = \
                dataio.get_blacklist_filtered_and_flattened_adj_mat(
                    fname, cfg["blacklist_fname"]
                )
            bin_counts[i] += aux.get_bin_counts(flat_corr_mat, corr_bins)
        # normalize
        bin_counts[i] = bin_counts[i] / \
            (np.sum(bin_counts[i] * (corr_bins[1] - corr_bins[0])))
        ax.plot(corr_bin_centers,
                bin_counts[i],
                color=colors[i],
                label=labels[i])

    ax.set_xlabel(settings.get_prop_tex_name(settings.correlation_tag))
    ax.set_ylabel(r"Probability density P(c)")
    ax.legend(loc=0)
    fig.savefig(cfg['outdata_dir'] + "pooledCorrDists.pdf",
                format="pdf",
                bbox_inches='tight')
    return fig
示例#2
0
def plot_pooled_corr_t_val_dists(cfg):
    """
    Plot the tvalue distributions for movie and rest

    Parameters
    ----------
    config : dict
        The following keys are required::

            "group_1_mat_fnames"
            "group_2_mat_fnames"
            "group_1_color"
            "group_2_color"
            "group_1_label"
            "group_2_label"
            "outdata_dir"
            "paired"

    Returns
    -------
    fig : the matplotlib figure object
    """
    config.require(cfg, [
        "group_1_mat_fnames", "group_2_mat_fnames", "group_1_color",
        "group_2_color", "group_1_label", "group_2_label", "outdata_dir",
        "paired"
    ])
    # get tvals
    flat_mats = dataio.get_blacklist_filtered_and_flattened_adj_mats(
        cfg["all_fnames"], cfg["blacklist_fname"])
    if cfg["paired"]:
        t_vals = measures.paired_t_value(flat_mats,
                                         len(cfg['group_1_mat_fnames']))
    else:
        t_vals = measures.unpaired_t_value(flat_mats,
                                           len(cfg['group_1_mat_fnames']))

    minVal = np.min(t_vals)
    maxVal = np.max(t_vals)
    n_bins = 100
    bins, binCenters = aux.get_lin_bins(n_bins,
                                        minVal - (np.abs(minVal) * 0.1),
                                        maxVal + (np.abs(maxVal) * 0.1))
    bin_counts = aux.get_bin_counts(t_vals, bins)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    # normalize
    bin_counts = bin_counts * 1. / (np.sum(bin_counts) * (bins[1] - bins[0]))
    labels = cfg['group_1_label'], cfg["group_2_label"]
    ax.plot(binCenters, bin_counts, label=r"" + labels[0] + "-" + labels[1])
    ax.set_xlabel(settings.get_prop_tex_name(settings.tval_tag))
    ax.set_ylabel(r"Probability density")
    ax.legend(loc=0)
    plt.savefig(cfg["outdata_dir"] + "tvalDist.pdf",
                format="pdf",
                bbox_inches='tight')
    return fig
示例#3
0
def plot_individual_correlation_dists(cfg):
    """
    Plotting the individual correlation profiles to see the variations.

    Parameters
    ----------
    cfg : dict
        the user-specified config dictionary.
        The following keys are required::

            "group_1_mat_fnames"
            "group_2_mat_fnames"
            "group_1_color"
            "group_2_color"
            "group_1_label"
            "group_2_label"
            "outdata_dir"

    Returns
    -------
    fig : the matplotlib figure object
    """
    # CFGCHANGE?
    config.require(cfg, )
    fname_group_list = [cfg["group_1_mat_fnames"], cfg["group_2_mat_fnames"]]
    colors = [cfg["group_1_color"], cfg["group_2_color"]]
    labels = [cfg["group_1_label"], cfg["group_2_label"]]

    fig = plt.figure()
    ax = fig.add_subplot(111)
    n_bins = 100
    corr_bins, corr_bin_centers = aux.get_lin_bins(n_bins, -1, 1.)

    for i, fname_group in enumerate(fname_group_list):
        for fname in fname_group:
            flat_corr_mat = \
                dataio.get_blacklist_filtered_and_flattened_adj_mat(
                    fname, cfg['blacklist_fname']
                )
            bin_counts = aux.get_bin_counts(flat_corr_mat, corr_bins)
            # normalize
            bin_counts = bin_counts / \
                (np.sum(bin_counts * (corr_bins[1] - corr_bins[0])))
            ax.plot(corr_bin_centers,
                    bin_counts,
                    color=colors[i],
                    label=labels[i])

    ax.set_xlabel(settings.get_prop_tex_name("corr"))
    ax.set_ylabel(r"Probability density P(c)")
    ax.legend(loc=0)
    fig.savefig(cfg['outdata_dir'] + "individualCorrDists.pdf",
                format="pdf",
                bbox_inches='tight')
    return fig
示例#4
0
def plotSameLinksShareVsLouvainSimilarityMeasures(cfg,
                                                  filenamesGroup1,
                                                  filenamesGroup2,
                                                  density=None):
    allFNames = filenamesGroup1 + filenamesGroup2
    data = dataio.mergeAndLoadLouvainProperties(allFNames, density)
    clusterings = data[settings.louvain_cluster_tag]
    simMatricesDict = gencomps.computeClusterSimilarityMeasures(clusterings)

    linkSimMatData = dataio.load_pickle(
        fnc.get_fname(cfg, settings.common_links_tag))
    index = 0
    densities = linkSimMatData[settings.densities_tag]
    if density is None:
        pass
    else:
        for j in range(len(densities)):
            if int(densities[index]) == int(density):
                index = j
                break
    linkSimMatrix = linkSimMatData[settings.common_links_tag][index]
    linkSimMatrix = linkSimMatrix / float(linkSimMatrix[0, 0])  # get to ratio

    triu_indices = np.triu_indices_from(linkSimMatrix, 1)
    for measure, simMatrix in simMatricesDict.iteritems():
        simMeasures = simMatrix[triu_indices]
        linkSims = linkSimMatrix[triu_indices]

        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(linkSims, simMeasures, "o")
        ax.set_ylabel(settings.get_prop_tex_name(measure))
        ax.set_xlabel(settings.get_prop_tex_name(settings.common_links_tag))
        (r, p) = pearsonr(linkSims, simMeasures)
        fig.suptitle(r"Pearson correlation: {:.3f}".format(r))
        fig.savefig(cfg["outdata_dir"] + settings.common_links_tag + "_vs_" +
                    measure + ".pdf",
                    format="pdf",
                    bbox_inches='tight')
示例#5
0
def plot_sim_mat(ax, m, measure, vmin=None, vmax=None):
    """
    Plot a similarity matrix in to the given axis "ax". "m" contains the image
    and "measure" is the name of the measure to be plotted (ie. a string)
    """
    sortedvals = np.sort(np.unique(m.flatten()))
    if measure in ['vi']:
        cmap = cm.hot
        if vmax is None:
            vmax = np.max(m)
        if vmin is None:
            try:
                vmin = sortedvals[1]
            except:
                vmin = sortedvals[0]
    elif measure in ['nmi', 'adjusted_rand', settings.common_links_tag]:
        cmap = cm.hot_r
        sortedvals = np.sort(np.unique(m.flatten()))
        if vmax is None:
            vmax_start = sortedvals[-1]
            for i in range(2, len(sortedvals)):
                vmax = sortedvals[-i]
                if (vmax_start - vmax) / vmax_start > 0.0001:
                    break
        if vmin is None:
            vmin = np.min(m)
    else:
        print "trying to plot unknown measure in function plot_sim_mat..." + \
            "defaulting the colormap hot_r"
        cmap = cm.hot_r
        if vmin is None:
            vmin = np.min(m)
        if vmax is None:
            vmax = np.max(m)

    im = ax.imshow(m, interpolation='nearest', cmap=cmap, vmax=vmax, vmin=vmin)
    cbar = plt.colorbar(im, ax=ax, orientation='horizontal')
    cbar.set_label(settings.get_prop_tex_name(measure))

    ticks = np.arange(0, len(m))
    ax.yaxis.set_ticklabels(ticks + 1)
    ax.yaxis.set_ticks(ticks)
    ax.xaxis.set_ticks(ticks)
    ax.xaxis.set_ticklabels(ticks + 1)

    ax.xaxis.set_ticks_position('top')
    ax.tick_params(length=0, width=0, colors='k')
    ymed = np.average(ax.get_ylim())
    xmed = np.average(ax.get_xlim())
    ax.axhline(y=ymed, xmin=0, xmax=1, color='0.0', lw=1.0, ls="-")
    ax.axvline(x=xmed, ymin=0, ymax=1, color="0.0", lw=1.0, ls="-")
示例#6
0
def plotDensityVsAvgPairedShare(cfg):
    """
    Plots the avg. fraction of the common links between the two settings
    (paired setup)
    """
    data = dataio.load_pickle(fnc.get_fname(cfg, settings.common_links_tag))
    linkSimMatrices = data[settings.common_links_tag]
    n = len(linkSimMatrices[0]) / 2
    densities = data[settings.densities_tag]
    linkSimAvgs = []
    linkSimStds = []
    vals = []
    for linkSimMatrix in linkSimMatrices:
        linkSimMatrix = linkSimMatrix / \
            float(linkSimMatrix[0, 0])  # get to ratio
        linkSimMatrix[range(n)]
        vals.append(linkSimMatrix[range(n), n + np.array(range(n))])
        linkSimAvgs.append(np.average(vals[-1]))
        linkSimStds.append(np.std(vals[-1]))
    fig = plt.figure()
    ax = fig.add_subplot(111)
    densities = np.array(densities)
    ax.set_xlabel(settings.get_prop_tex_name(settings.densities_tag))
    ax.set_ylabel(r"Share of common links")
    vals = np.array(vals).T
    for i, val in enumerate(vals):
        ax.plot(densities, val, "r", alpha=0.4, label=str(i + 1))

    ax.errorbar(densities, linkSimAvgs, linkSimStds, color="b", label="avg")
    ax.set_xscale('log')
    l = ax.legend(loc=1)
    l.get_frame().set_alpha(0.75)
    if cfg['include_mst']:
        plt.savefig(cfg["outdata_dir"] + settings.common_links_tag +
                    "_paired_vs_" + settings.densities_tag + "_with_mst.pdf",
                    format="pdf",
                    bbox_inches="tight")
    else:
        plt.savefig(cfg["outdata_dir"] + settings.common_links_tag +
                    "_paired_vs_" + settings.densities_tag + ".pdf",
                    format="pdf",
                    bbox_inches="tight")
示例#7
0
def _plot_comparison_and_p_value(ax,
                                 densities,
                                 means0,
                                 means1,
                                 confInt0,
                                 confInt1,
                                 color0,
                                 color1,
                                 pVals=None,
                                 measure_key="",
                                 paired=False,
                                 n=-1,
                                 labels=None,
                                 xscale="linear",
                                 legendloc=0,
                                 significanceTresholdLine=None):

    ax.set_xlabel(settings.get_prop_tex_name(settings.densities_tag))
    ax.set_ylabel(settings.get_prop_tex_name(measure_key))
    # pax.set_ylabel("P-value")
    p1 = ax.plot(densities, means0, label=labels[0], color=color0, lw=1.5)
    p2 = ax.plot(densities, means1, label=labels[1], color=color1, lw=1.5)
    alpha = 0.25
    ax.fill_between(densities,
                    confInt0[0],
                    confInt0[1],
                    color=color0,
                    alpha=alpha)
    ax.fill_between(densities,
                    confInt1[0],
                    confInt1[1],
                    color=color1,
                    alpha=alpha)

    pax = ax.twinx()
    if significanceTresholdLine is not None:
        pax.axhline(significanceTresholdLine,
                    xmin=0,
                    xmax=1,
                    color="0.4",
                    lw=1.0,
                    ls="--",
                    zorder=-1000)
    if paired:
        pax.axhline(y=2**-(n - 1),
                    xmin=0,
                    xmax=1,
                    color='0.6',
                    lw=1.5,
                    ls="-",
                    zorder=-1000)
    p3 = pax.semilogy(densities,
                      pVals,
                      "-o",
                      color="0.25",
                      markersize=2.5,
                      label=r"$p$-value",
                      zorder=-10)
    # to take the left side log scale off (a bug in matplotlib with loglog)
    pax.yaxis.tick_right()
    pax.set_ylim((10**-4, 10**0))

    pax.set_xscale(xscale)
    ax.set_xscale(xscale)
    if xscale == "log":
        ax.set_xlim((np.min(densities), np.max(densities)))
        pax.set_xlim((np.min(densities), np.max(densities)))
    # make the legend
    lns = p1 + p2 + p3
    labs = [l.get_label() for l in lns]
    l = pax.legend(lns, labs, loc=legendloc, numpoints=1, handlelength=1)
    l.get_frame().set_alpha(0.5)
    return ax, pax
示例#8
0
def plot_link_dist_probs_by_condition(cfg):
    """
    Plots the link distance PDFs pooled by condition.

    Parameters
    ----------
    cfg : dict
        the user-specified config dictionary.
        The following keys are required::

            "group_1_mat_fnames"
            "group_2_mat_fnames"
            "group_1_color"
            "group_2_color"
            "group_1_label"
            "group_2_label"
            "outdata_dir"

    Returns
    -------
    None
    """
    # CFGCHANGE?
    config.require(cfg, [
        "group_1_mat_fnames", "group_2_mat_fnames", "group_1_color",
        "group_2_color", "group_1_label", "group_2_label", "outdata_dir",
        "density"
    ])
    fname_group_list = [cfg["group_1_mat_fnames"], cfg["group_2_mat_fnames"]]
    colors = [cfg["group_1_color"], cfg["group_2_color"]]
    labels = [cfg["group_1_label"], cfg["group_2_label"]]

    # ns = []
    data_example = dataio.load_pickle(
        fnc.get_ind_fname(fname_group_list[0][0], cfg,
                          settings.link_distance_tag))
    densities = data_example[settings.config_tag][settings.densities_tag]

    # for individual plots:
    # figs = [plt.figure() for p in densities]
    # axs = [fig.add_subplot(111) for fig in figs]

    group_distances = []
    for i, fname in enumerate(fname_group_list):
        # ns.append(len(fname))
        distances = [np.array([]) for p in densities]
        for fname in fname:
            print fname
            data = dataio.load_pickle(
                fnc.get_ind_fname(fname, cfg, settings.link_distance_tag))
            p_f_dists = data[settings.link_distance_tag]
            for j, p in enumerate(densities):
                # print p
                distances[j] = np.hstack((distances[j], p_f_dists[j]))

        # for individual plots:
        # for j in range(len(densities)):
        #     genplots.plot_inv_cdf(
        #         axs[j], distances[j], yscale='log',
        #         label=labels[i], color=colors[i])
        group_distances.append(distances)

    indices = range(len(densities))  # [6,7,10] #density indices
    print densities
    for k, j in enumerate(indices):
        fig = plt.figure(figsize=(4, 3))
        p = densities[j]
        ax = fig.add_subplot(1, 1, 1)
        ax.set_xlabel(settings.get_prop_tex_name(settings.link_distance_tag))
        ax.set_ylabel(r"1-CDF(d)")
        print p, j
        ax.text(.5,
                0.10,
                r"$\rho$ = " + str(p * 100) + "\%",
                ha='center',
                va='center',
                transform=ax.transAxes)
        for i in range(len(fname_group_list)):
            genplots.plot_inv_cdf(ax,
                                  group_distances[i][j],
                                  label=labels[i],
                                  color=colors[i],
                                  yscale='log')
        plt.tight_layout()
        fig.savefig(cfg['outdata_dir'] + "linkDistProbs_" + str(p) + ".pdf",
                    format="pdf",
                    bbox_inches="tight")
        plt.close(fig)
    return None