示例#1
0
def plot_summary_stat_boxplots_by_exp_groups(
    arr_df, arr_summary_stats, sample_inds=None, fig_path=None, fig_prefix=None, fig_size=(11, 11)
):
    """"
    Plot boxplots of summary stats by experimental groups
    
    Parameters:
    ----------
    arr_df: pandas.DataFrame
        dataframe of array data
    arr_summary_stats: list
        list of array summary statistics for which boxplots are to be generated.
    sample_inds: [bool array | None (default)]
        defines which samples to use in boxplots (allows selecting subset). 
        If set to None (default) will use all samples.
    fig_path: [string | None (default)]
        if set, will save figures in fig_path. if None (default) figures are not saved.   
    fig_prefix: [string | None]
        if a subset of the data is plotted, allows specifying a figure name prefix
        for saving to files. 
    fig_size : 2-tuple of integers default set to (11, 11)
            (width, height) of figure
    """
    if sample_inds is None:
        sample_inds = [True] * arr_df.shape[0]
    if fig_prefix is None:
        fig_prefix = ""

    for assay in arr_summary_stats:
        f = plt.figure()
        f.set_size_inches(fig_size)
        mbp.myboxplot_by_labels(arr_df[sample_inds][assay], arr_df[sample_inds]["group"])
        plt.title("".join([fig_prefix, " ", assay, "_responses"]))
        plt.xlabel("group #")

        # save to file only if save_flag is on
        if fig_path is not None:
            f.set_tight_layout(True)
            filename = "".join([fig_path, fig_prefix, assay, "_boxplots_by_groups.png"])
            f.savefig(filename, dpi=200)
示例#2
0
def plot_summary_stat_boxplots_by_clusters(
    arr_df, clusters, prot_names, arr_summary_stats, sample_inds=None, fig_prefix=None, fig_path=None, fig_size=(11, 11)
):
    """ 
    Plot boxplots of summary stats by clusters
    
    Parameters:
    ----------
    arr_df: pandas.DataFrame
        dataframe of array data
    clusters: dictionary
        cluster assignment of each datapoint indexed by ind_dict.keys()
    prot_names: list
        list of strings of protein antigens from which peptides are on the array_data_filename
    arr_summary_stats: list
        list of array summary statistics for which boxplots are to be generated.
    sample_inds: [bool array | None]
        boolean array specifying which samples to use. If None (default) will plot all.
    fig_path: [string | None (default)]
        if set, will save figures in fig_path. if None (default) figures are not saved.
    fig_size : 2-tuple of integers default set to (11, 11)
            (width, height) of figure   
    """
    if sample_inds is None:
        sample_inds = [True] * arr_df.shape[0]

    for p in prot_names:
        for assay in arr_summary_stats:
            f = plt.figure()
            f.set_size_inches(fig_size)
            mbp.myboxplot_by_labels(arr_df[sample_inds][assay], clusters[p])
            plt.title("".join([p, " clusters ", assay]))
            plt.xlabel("Cluster #")
            num_clusters = len(np.unique(clusters[p]))
            # save to file only if save_flag is on
            if fig_path is not None:
                filename = "".join(
                    [fig_path, fig_prefix, p, "_", assay, "_boxplots_by_clusters_n_", str(num_clusters), ".png"]
                )
                f.savefig(filename, dpi=200)
f.set_tight_layout(True)
f.set_size_inches(18, 11)
for i, p in enumerate(['SHA_ha', 'SHA_na']):
        axarr[i].plot(np.arange(len(ind_dict[p])), bg_df[ind_dict[p]].T)
        axarr[i].set_title(p + " BSA responses " + str(i+1) + " (n = " + str(len(bg_df.shape[0])) + ")")
        # axarr[i].set_yticks([])
    filename = "".join([FIG_PATH, p,  "_BSA_responses.png"])
    f.savefig(filename, dpi=20)


# plot boxplots of all clusters
for p in ['SHA_ha', 'SHA_na']:
    for assay in assays:
        f = figure()
        f.set_size_inches(18, 11)
        mbp.myboxplot_by_labels(arr_df[post_inds][assay], clusters[p])
        plt.title("".join([p, " clusters ", assay]))
        plt.xlabel('Cluster #')
        filename = "".join([FIG_PATH, p, "_", assay, "_boxplots_by_clusters_n_", str(num_clusters), ".png"])
        f.savefig(filename, dpi=200)

# plot boxplots of all groups:
for assay in assays + arr_summary_stats:
    for t in time_dict.keys():
        f = figure()
        f.set_size_inches(18, 11)
        mbp.myboxplot_by_labels(arr_df[time_dict[t]][assay], arr_df[time_dict[t]]['group'])
        plt.title("".join([t, "_", assay, "_responses"]))
        plt.xlabel('group #')
        filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"])
        f.savefig(filename, dpi=200)
for p in ["SHA_ha", "SHA_na"]:
    f, axarr = plt.subplots(1)
    f.set_tight_layout(True)
    f.set_size_inches(18, 11)
    sch.dendrogram(Z_struct[p], color_threshold=np.inf, labels=arr_df.index, orientation="left")
    axarr.set_title(p)
    filename = "".join([FIG_PATH, p, "_dendrograms.png"])
    f.savefig(filename, dpi=200)


# plot boxplots of all clusters
for p in ["SHA_ha", "SHA_na"]:
    for sum_stat in arr_summary_stats:
        f = plt.figure()
        f.set_size_inches(18, 11)
        mbp.myboxplot_by_labels(arr_df[sum_stat], clusters[p])
        plt.title("".join([p, " clusters ", sum_stat]))
        plt.xlabel("Cluster #")
        filename = "".join([FIG_PATH, p, "_", sum_stat, "_boxplots_by_clusters_n_", str(num_clusters), ".png"])
        f.savefig(filename, dpi=200)

# # plot boxplots of all groups:
# for assay in assays + arr_summary_stats:
#     for t in time_dict.keys():
#         f = figure()
#         f.set_size_inches(18, 11)
#         mbp.myboxplot_by_labels(arr_df[time_dict[t]][assay], arr_df[time_dict[t]]['group'])
#         plt.title("".join([t, "_", assay, "_responses"]))
#         plt.xlabel('group #')
#         filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"])
#         f.savefig(filename, dpi=200)
示例#5
0
    diff_df.drop("modified", axis=1, inplace=True)
    print(diff_df)
    diff_df.to_csv(path_or_buf=filename, sep="\t")

# now translate indices into cluster inds: - not used since code modified to include tuple of indices into dataframe as index!
# c_inds = np.unravel_index(sig_df_HA.index,(num_clusters, num_clusters-1))


print("Vic HA clusters with significant differences in HAI or NT assays")
print(sig_df_HA)

# plot boxplots of all clusters
for p in ["Vic_HA", "Vic_NA"]:
    for assay in Vic_assays:
        f = figure()
        mbp.myboxplot_by_labels(df[assay], clusters[p])
        plt.title("".join([p, " clusters ", assay_strs[assay]]))
        plt.xlabel("Cluster #")
        filename = "".join([FIG_PATH, p, "_", assay_strs[assay], "_boxplots_by_clusters_n_", str(num_clusters), ".png"])
        f.savefig(filename, dpi=200)

# Plot figures for a given clustering solution - currently only performed for the H3N2 victoria strain:
for p in ["Vic_HA", "Vic_NA"]:

    f, axarr = plt.subplots(num_clusters, 1)
    f.set_tight_layout(True)
    f.set_size_inches(18, 11)
    # plot clusters
    for i in np.arange(num_clusters):

        axarr[i].plot(np.arange(len(ind_dict[p])), df[ind_dict[p]].loc[clusters[p] == i + 1].T)
# plot boxplots of all groups:


# only plot for PBS, Vac, and AS03:
curr_df = arr_df[(arr_df.group != 'WT_post_MF59') & (arr_df.group != 'Ob_post_MF59') &
                 (arr_df.group != 'WT_pre_MF59') & (arr_df.group != 'Ob_pre_MF59')]
curr_time_dict = {}
curr_time_dict['Pre'] = curr_df.group.str.contains('pre')
curr_time_dict['Post'] = curr_df.group.str.contains('post')


for assay in arr_summary_stats:
    for t in ['Post']:  # time_dict.keys():
        f, axarr = plt.subplots(1)
        f.set_tight_layout(True)
        mbp.myboxplot_by_labels(curr_df[curr_time_dict[t]][assay], curr_df[curr_time_dict[t]]['group'])
        axarr.set_title("".join([t, " ", assay.replace('_', ' '), " responses"]), fontsize=16)
        axarr.tick_params(axis='both', which='major', labelsize=14)
        axarr.set_yscale('log')

        filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.png"])
        f.savefig(filename, dpi=200)

        filename = "".join([FIG_PATH, t, "_", assay, "_boxplots_by_groups.eps"])    
        f.savefig(filename, dpi=1000)


# Figure 4, 5 and 6 - clustering dendrograms, median responses and summary stats of WT vs. Obese for each group:
for a in ['Vac', 'AS03']:
    curr_inds = group_inds['Ob_post_' + a].append(group_inds['WT_post_' + a])
    amp.plot_clustering_dendrograms(Z_struct=Z_struct[a], prot_names=['SHA_ha'], labels=arr_df.loc[curr_inds].group, fig_prefix=a + '_', fig_path=FIG_PATH)