示例#1
0
def plot_variance(main_dir='./', plot_dir='./plots/',
                  log=True, genus_only=True):
    df = sort_by_variance(main_dir=main_dir, genus_only=genus_only)
    fig, ax = plt.subplots()
    df.reset_index().variance.plot(ax=ax, kind='hist', bins=100)
    if log:
        ax.set_yscale('log')
    plt.title('distribution of variances (log scale)')
    plt.xlabel("variance for genera's abundance across all samples")
    plt.ylabel("frequency (log scale)")
    filepath = \
        concat_dir_and_filename(plot_dir,
                                'distribution_of_sample-wise_variances.pdf')
    plt.savefig(filepath)
示例#2
0
def plot_pca_results(top_percent=20, genus_only=False,
                     facet_row=True, uniform_axes=True,
                     main_dir='./', plot_dir='./plots/',
                     savefig=True):
    # get data that's ready for PCA:
    pca_input, data_transformed, variances = \
        run_pca(main_dir=main_dir, top_percent=top_percent,
                genus_only=genus_only)

    # import the sample info needed to map features to sample IDs.
    sample_info = colnames_to_sample_info_array(pca_input, main_dir=main_dir)

    # prepare axis labels, which also serve as dataframe column names.
    x_axis_label = 'principal component 1 ({0:.0%})'.format(variances[0])
    y_axis_label = 'principal component 2 ({0:.0%})'.format(variances[1])

    # put together the transformed data and sample descriptions
    plot_data = pd.concat([pd.DataFrame(
        {x_axis_label: data_transformed[:, 0],
         y_axis_label: data_transformed[:, 1]}), sample_info], axis=1)

    # define a custom color palette using:
    # Conditions were seized at week ten, so seven early samples in
    # the original condition and four latest samples in an alternative
    # condition.
    color_palette = build_color_palette(num_items=14 - 4 + 1,
                                        weeks_before_switch=7)
    # color_palette = sns.cubehelix_palette(11, start=.5, rot=-.75)

    # update matplotlib params for bigger fonts, ticks:
    mpl.rcParams.update({
        'font.size': 16, 'axes.titlesize': 17, 'axes.labelsize': 15,
        'xtick.labelsize': 10, 'ytick.labelsize': 13,
        'font.weight': 600,
        'axes.labelweight': 600, 'axes.titleweight': 600})
    # Plot with Seaborn
    if facet_row:
        plt.figure(figsize=(4, 8))
    else:
        plt.figure(figsize=(6, 12))
    sns.set(style="ticks")

    # prepare the max and min axes values if we are forcing them to same range
    pc_colnames = [col for col in plot_data.columns
                   if 'principal component' in col]

    max_value = plot_data[pc_colnames].max(axis=0).max()
    min_value = plot_data[pc_colnames].min(axis=0).min()

    axis_max = math.ceil(max_value * 100) / 100.0
    axis_min = math.floor(min_value * 100) / 100.0

    def base_plot(**kwargs):
        plot = sns.FacetGrid(plot_data,
                             hue='week', palette=color_palette,
                             size=3, aspect=1,
                             **kwargs)
        plot = (plot.map(plt.scatter, x_axis_label, y_axis_label,
                         edgecolor="w", s=60).add_legend())
        return plot

    plot_args = {}

    if facet_row:
        plot_args['row'] = 'oxy'
        plot_args['col'] = 'rep'
    if uniform_axes:
        plot_args['xlim'] = (axis_min, axis_max)
        plot_args['ylim'] = (axis_min, axis_max)

    print(plot_args)
    g = base_plot(**plot_args)

    filename = concat_dir_and_filename(
        plot_dir, 'pca_of_top_{}_percent--'.format(top_percent))

    # prepare a filename, depending on whether all taxonomy or only genus
    # is used.
    if genus_only:
        filename += 'genus_only'
    else:
        filename += 'all_taxa'
    if uniform_axes:
        filename += '_unif_axes_'
    if facet_row:
        filename += '--faceted.pdf'
    else:
        filename += '.pdf'

    if savefig:
        g.fig.savefig(filename)