def pca_plot(pca_data: pd.DataFrame, dim1: str, dim2: str, dim3: str):
    """ 
    Returns plot displaying 3 PCA variables (including color). 
    Parameters
    ----------
    pca: Fitted pca object to plot. 
    df: Dataframe pca was fit on. Used for column names. 
    dim1: String of column name of principal component to plot on x-axis. 
    dim2: String of column name of principal component to plot on y-axis.
    dim3: String of column name of principal component to plot as colour.

    Returns
    ----------
    Plot of PCA with dim1 on x-axis, dim2 on y-axis, and coloured by dim3


    """
    #Set plot theme within function: 
    p9.theme_set(p9.theme_classic())

    num_components = len(pca_data.columns) - 1
    color_type = type(pca_data.loc[0, dim3])
    p = (p9.ggplot(pca_data, p9.aes(x=dim1, y=dim2, fill=dim3))
        + p9.geom_point()
    )
    if(color_type==str):
        print('color type is qualitative')
        #Can't find a better colour palette yet.
        #p = p + (p9.scale_fill_brewer(type="qual", palette='Accent'))
    return(p)
Пример #2
0
def derplot(adata=None,
            filename='derplot',
            embedding='tsne',
            feature='sample_type_tech',
            size=(12, 12),
            save=False,
            draw=False,
            psize=1):
    start = datetime.datetime.now()
    p.options.figure_size = size
    savename = filename + '.' + embedding + '.' + feature + '.derplot.png'
    print(
        start.strftime("%H:%M:%S"),
        'Starting ... \t',
        savename,
    )
    p.theme_set(p.theme_classic())
    pt = \
    p.ggplot(p.aes(embedding +'0', embedding + '1', color=feature), adata.obs) \
        + p.geom_point(size=psize, alpha = 1, stroke = 0 ) \
        + p.guides(color = p.guide_legend(override_aes={'size': 15}))

    if save: pt.save(savename, format='png', dpi=200)
    end = datetime.datetime.now()
    delta = end - start
    print(start.strftime("%H:%M:%S"), str(int(delta.total_seconds())),
          's to make: \t', savename)
def plot_pca_vis(pca: PCA, df: pd.DataFrame, pc_x: int = 0, pc_y: int = 1, num_dims: int = 5) -> plt:

    """
    Plot contribution of different dimensions to principal components. 
    
    Parameters
    ----------
    pca: Fitted pca object to plot. 
    df: Dataframe pca was fit on. Used for column names. 
    pc_x: Index of principal component to plot on x-axis. 
    pc_y: Index of principal component to plot on y-axis. 
    num_dims: Number of contributing elements to include for each axis. 

    Returns
    ----------
    Null

    Prints matplotlib.plt object. 

    https://stackoverflow.com/questions/45148539/project-variables-in-pca-plot-in-python
    Adapted into function by Tim Cashion
    """
    #Set plot theme within function: 
    p9.theme_set(p9.theme_classic())

    # Get the PCA components (loadings)
    PCs = pca.components_
    
    PC_x_index = PCs[pc_x, : ].argsort()[-num_dims:][::-1]
    PC_y_index = PCs[pc_y, : ].argsort()[-num_dims:][::-1]
    combined_index = set(list(PC_x_index) + list(PC_y_index))
    combined_index = sorted(combined_index)
    PCs = PCs[:, combined_index]
    # Use quiver to generate the basic plot
    fig = plt.figure(figsize=(5,5))
    plt.quiver(np.zeros(PCs.shape[1]), np.zeros(PCs.shape[1]),
            PCs[pc_x,:], PCs[pc_y,:], 
            angles='xy', scale_units='xy', scale=1)

    # Add labels based on feature names (here just numbers)
    feature_names = df.columns[combined_index]
    for i,j,z in zip(PCs[pc_y,:]+0.02, PCs[pc_x,:]+0.02, feature_names):
        plt.text(j, i, z, ha='center', va='center')

    # Add unit circle
    circle = plt.Circle((0,0), 1, facecolor='none', edgecolor='b')
    plt.gca().add_artist(circle)

    # Ensure correct aspect ratio and axis limits
    plt.axis('equal')
    plt.xlim([-1.0,1.0])
    plt.ylim([-1.0,1.0])

    # Label axes
    plt.xlabel('PC ' + str(pc_x))
    plt.ylabel('PC ' + str(pc_y))
    
    plt.tight_layout()
    return plt
Пример #4
0
    def plot_fusion(self):
        """
        plot fusion count
        """

        p9.theme_set(p9.theme_void())
        for ref in self.pos_dict:
            if ref in self.df_tsne.columns:
                out_plot_file = f'{self.out_prefix}_{ref}_fusion.pdf'
                plot = p9.ggplot(self.df_tsne, p9.aes(x="tSNE_1", y="tSNE_2", color=ref)) + \
                    p9.geom_point(size=0.2) + \
                    p9.theme_bw() + \
                    p9.scale_color_gradient(low="lightgrey",high="blue")
                plot.save(out_plot_file)
Пример #5
0
import argparse
import importlib
import collections
import os
import sys

import numpy as np
import pandas as pd
import plotnine as gg
import base.plot as bp

from base import config_lib

sys.path.append(os.getcwd())
gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))

# FIGURE_OPTIONS will hold all of the details for specific details to reproduce
# each figure. These include the config, number of jobs and the plot function.
# Figures are named with reference to "A Tutorial on Thompson Sampling":
#     https://arxiv.org/abs/1707.02038.

FigureOptions = collections.namedtuple(
    'FigureOptions', ['fig_name', 'config', 'paper_n_jobs', 'plot_fun'])

FIGURE_OPTIONS = collections.OrderedDict([
    [
        '3',
        FigureOptions(fig_name='3',
                      config='finite_arm.config_simple',
                      paper_n_jobs=20000,
Пример #6
0
    job_config = config_lib.get_job_config(config, job_id)
    experiment = job_config['experiment']
    experiment.run_experiment()
    results.append(experiment.results)

#############################################################################
# Collating data with Pandas
params_df = config_lib.get_params_df(config)
df = pd.merge(pd.concat(results), params_df, on='unique_id')
plt_df = (df.groupby(['agent', 't']).agg({
    'instant_regret': np.mean
}).reset_index())

#############################################################################
# Plotting and analysis (uses plotnine by default)
"""
gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))
gg.theme_update(figure_size=(12, 8))

p = (gg.ggplot(plt_df)
     + gg.aes('t', 'instant_regret', colour='agent')
     + gg.geom_line())
print(p)
"""
"""
plt_df_cum_regret = (df.groupby(['agent', 't'])
          .agg({'cum_regret': np.mean})
          .reset_index())

q = (gg.ggplot(plt_df_cum_regret)
     + gg.aes('t', 'cum_regret', colour='agent')
Пример #7
0
def make_plots():
    # Setup plotting
    import pandas as pd
    import plotnine as gg
    import warnings

    pd.options.mode.chained_assignment = None
    gg.theme_set(gg.theme_bw(base_size=16, base_family='serif'))
    gg.theme_update(figure_size=(12, 8),
                    panel_spacing_x=0.5,
                    panel_spacing_y=0.5)
    warnings.filterwarnings('ignore')

    # Load Results
    experiments = {
        'Random': RANDOM_RESULTS_PATH,
        'TRPO': TRPO_RESULTS_PATH,
    }
    data_frame, sweep_vars = csv_load.load_bsuite(experiments)
    bsuite_score = summary_analysis.bsuite_score(data_frame, sweep_vars)
    bsuite_summary = summary_analysis.ave_score_by_tag(bsuite_score,
                                                       sweep_vars)

    # Generate general plots
    radar_fig = summary_analysis.bsuite_radar_plot(bsuite_summary, sweep_vars)
    radar_fig.savefig(PLOTS_PATH + 'radar_fig.png', bbox_inches='tight')
    bar_fig = summary_analysis.bsuite_bar_plot(bsuite_score, sweep_vars)
    bar_fig.save(PLOTS_PATH + 'bar_fig.png')
    compare_bar_fig = summary_analysis.bsuite_bar_plot_compare(
        bsuite_score, sweep_vars)
    compare_bar_fig.save(PLOTS_PATH + 'compare_bar_fig.png')

    # Generate specific analyses
    # Learning performance
    from bsuite.experiments.bandit import analysis as bandit_analysis
    bandit_df = data_frame[data_frame.bsuite_env == 'bandit'].copy()
    bandit_scores = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit', sweep_vars)
    bandit_scores.save(PLOTS_PATH + 'bandits_scores.png')
    bandit_convergence = bandit_analysis.plot_learning(bandit_df, sweep_vars)
    bandit_convergence.save(PLOTS_PATH + 'bandits_convergence.png')
    bandit_seeds = bandit_analysis.plot_seeds(bandit_df, sweep_vars)
    bandit_seeds.save(PLOTS_PATH + 'bandits_seeds.png')

    # Robustness to noise
    from bsuite.experiments.bandit_noise import analysis as bandit_noise_analysis
    bandit_noise_df = data_frame[data_frame.bsuite_env ==
                                 'bandit_noise'].copy()
    bandit_noise_overall = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit_noise', sweep_vars)
    bandit_noise_overall.save(PLOTS_PATH + 'bandits_noise_overall.png')
    bandit_noise_avg = bandit_noise_analysis.plot_average(
        bandit_noise_df, sweep_vars)
    bandit_noise_avg.save(PLOTS_PATH + 'bandits_noise_avg.png')
    bandit_noise_regret = bandit_noise_analysis.plot_learning(
        bandit_noise_df, sweep_vars)
    bandit_noise_regret.save(PLOTS_PATH + 'bandits_noise_regret.png')

    # Robustness to reward scaling
    from bsuite.experiments.bandit_scale import analysis as bandit_scale_analysis
    bandit_scale_df = data_frame[data_frame.bsuite_env ==
                                 'bandit_scale'].copy()
    bandit_scale_overall = summary_analysis.plot_single_experiment(
        bsuite_score, 'bandit_scale', sweep_vars)
    bandit_scale_overall.save(PLOTS_PATH + 'bandits_scale_overall.png')
    bandit_scale_avg = bandit_scale_analysis.plot_average(
        bandit_scale_df, sweep_vars)
    bandit_scale_avg.save(PLOTS_PATH + 'bandits_scale_avg.png')
    bandit_scale_learn = bandit_scale_analysis.plot_learning(
        bandit_scale_df, sweep_vars)
    bandit_scale_learn.save(PLOTS_PATH + 'bandits_scale_learn.png')
    """
Пример #8
0
    def batch_plots(self):

        # First, put together active leak data and output for live plotting functionality
        # (no AL plot here currently)
        dfs = self.active_leak_dfs

        for i in range(len(dfs)):
            n_cols = dfs[i].shape[1]
            dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1)
            dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1)
            dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1)
            dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1)
            dfs[i]['program'] = self.directories[i]

        # Move reference program to the top of the list
        for i, df in enumerate(dfs):
            if df['program'].iloc[0] == self.ref_program:
                dfs.insert(0, dfs.pop(i))

        # Arrange dfs for plot 1
        dfs_p1 = dfs.copy()
        for i in range(len(dfs_p1)):
            # Reshape
            dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean',
                                                    'std', 'low', 'high', 'program'])

        # Combine dataframes into single dataframe for plotting
        df_p1 = dfs_p1[0]
        for i in dfs_p1[1:]:
            df_p1 = df_p1.append(i, ignore_index=True)

        # Output Emissions df for other uses (e.g. live plot)
        df_p1.to_csv(self.output_directory + 'mean_active_leaks.csv', index=True)

        # Now repeat for emissions (which will actually be used for batch plotting)
        dfs = self.emission_dfs

        for i in range(len(dfs)):
            n_cols = dfs[i].shape[1]
            dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1)
            dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1)
            dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1)
            dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1)
            dfs[i]['program'] = self.directories[i]

            # Move reference program to the top of the list
        for i, df in enumerate(dfs):
            if df['program'].iloc[0] == self.ref_program:
                dfs.insert(0, dfs.pop(i))

        # Arrange dfs for plot 1
        dfs_p1 = dfs.copy()
        for i in range(len(dfs_p1)):
            # Reshape
            dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean',
                                                    'std', 'low', 'high', 'program'])

        # Combine dataframes into single dataframe for plotting
        df_p1 = dfs_p1[0]
        for i in dfs_p1[1:]:
            df_p1 = df_p1.append(i, ignore_index=True)

        # Output Emissions df for other uses (e.g. live plot)
        df_p1.to_csv(self.output_directory + 'mean_emissions.csv', index=True)

        # Make plots from list of dataframes - one entry per dataframe
        pn.theme_set(pn.theme_linedraw())
        plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') +
                 pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) +
                 pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) +
                 pn.ylab('Daily emissions (kg/site)') + pn.xlab('') +
                 pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                 pn.scale_x_datetime(labels=date_format('%Y')) +
                 pn.scale_y_continuous(trans='log10') +
                 pn.ggtitle('To reduce uncertainty, use more simulations.') +
                 pn.labs(color='Program', fill='Program') +
                 pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2),
                          panel_grid_minor_x=pn.element_blank(),
                          panel_grid_major_x=pn.element_blank(),
                          panel_grid_minor_y=pn.element_line(
                              colour='black', linewidth=0.5, alpha=0.3),
                          panel_grid_major_y=pn.element_line(
                              colour='black', linewidth=1, alpha=0.5))
                 )
        plot1.save(self.output_directory + 'program_comparison.png', width=7, height=3, dpi=900)

        # Build relative mitigation plots
        dfs_p2 = dfs.copy()

        for i in dfs_p2[1:]:
            i['mean_dif'] = 0
            i['std_dif'] = 0
            i['mean_ratio'] = 0
            i['std_ratio'] = 0
            for j in range(len(i)):
                ref_mean = dfs_p2[0].loc[dfs_p2[0].index[j], 'mean']
                ref_std = dfs_p2[0].loc[dfs_p2[0].index[j], 'std']
                alt_mean = i.loc[i.index[j], 'mean']
                alt_std = i.loc[i.index[j], 'std']

                i.loc[i.index[j], 'mean_dif'] = alt_mean - ref_mean
                i.loc[i.index[j], 'std_dif'] = math.sqrt(
                    math.pow(alt_std, 2) + math.pow(ref_std, 2))
                i.loc[i.index[j], 'mean_ratio'] = alt_mean / ref_mean
                i.loc[i.index[j], 'std_ratio'] = math.sqrt(
                    math.pow((alt_std / alt_mean), 2) + math.pow((ref_std / ref_mean), 2))

        # Build plotting dataframe
        df_p2 = self.dates_trunc.copy().to_frame()
        df_p2['program'] = dfs_p2[1]['program']
        df_p2['mean_dif'] = dfs_p2[1]['mean_dif']
        df_p2['std_dif'] = dfs_p2[1]['std_dif']
        df_p2['mean_ratio'] = dfs_p2[1]['mean_ratio']
        df_p2['std_ratio'] = dfs_p2[1]['std_ratio']

        df_p2['low_dif'] = dfs_p2[1]['mean_dif'] - 2 * dfs_p2[1]['std_dif']
        df_p2['high_dif'] = dfs_p2[1]['mean_dif'] + 2 * dfs_p2[1]['std_dif']
        df_p2['low_ratio'] = dfs_p2[1]['mean_ratio'] / (dfs_p2[1]
                                                        ['mean_ratio'] + 2 * dfs_p2[1]['std_ratio'])
        df_p2['high_ratio'] = dfs_p2[1]['mean_ratio'] + 2 * dfs_p2[1]['std_ratio']

        pd.options.mode.chained_assignment = None
        for i in dfs_p2[2:]:
            i['low_dif'] = i['mean_dif'] - 2 * i['std_dif']
            i['high_dif'] = i['mean_dif'] + 2 * i['std_dif']
            i['low_ratio'] = i['mean_ratio'] / (i['mean_ratio'] + 2 * i['std_ratio'])
            i['high_ratio'] = i['mean_ratio'] + 2 * i['std_ratio']
            short_df = i[['program', 'mean_dif', 'std_dif', 'low_dif',
                          'high_dif', 'mean_ratio', 'std_ratio', 'low_ratio', 'high_ratio']]
            short_df['datetime'] = np.array(self.dates_trunc)
            df_p2 = df_p2.append(short_df, ignore_index=True)

        # Make plot 2
        plot2 = (pn.ggplot(None) + pn.aes('datetime', 'mean_dif', group='program') +
                 pn.geom_ribbon(
                     df_p2, pn.aes(ymin='low_dif', ymax='high_dif', fill='program'), alpha=0.2) +
                 pn.geom_line(df_p2, pn.aes('datetime', 'mean_dif', colour='program'), size=1) +
                 pn.ylab('Daily emissions difference (kg/site)') + pn.xlab('') +
                 pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                 pn.scale_x_datetime(labels=date_format('%Y')) +
                 pn.ggtitle('Daily differences may be uncertain for small sample sizes') +
                 #        pn.scale_y_continuous(trans='log10') +
                 pn.labs(color='Program', fill='Program') +
                 pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2),
                          panel_grid_minor_x=pn.element_blank(),
                          panel_grid_major_x=pn.element_blank(),
                          panel_grid_minor_y=pn.element_line(
                              colour='black', linewidth=0.5, alpha=0.3),
                          panel_grid_major_y=pn.element_line(
                              colour='black', linewidth=1, alpha=0.5))
                 )
        plot2.save(self.output_directory + 'relative_mitigation.png', width=7, height=3, dpi=900)

        # Make plot 3
        plot3 = (pn.ggplot(None) + pn.aes('datetime', 'mean_ratio', group='program') +
                 pn.geom_ribbon(df_p2, pn.aes(
                     ymin='low_ratio', ymax='high_ratio', fill='program'), alpha=0.2) +
                 pn.geom_hline(yintercept=1, size=0.5, colour='blue') +
                 pn.geom_line(df_p2, pn.aes('datetime', 'mean_ratio', colour='program'), size=1) +
                 pn.ylab('Emissions ratio') + pn.xlab('') +
                 pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                 pn.scale_x_datetime(labels=date_format('%Y')) +
                 pn.ggtitle(
                     'Blue line represents equivalence. \nIf uncertainty is high, use more '
                     'simulations and/or sites. \nLook also at ratio of mean daily emissions'
                     'over entire timeseries.') +
                 pn.labs(color='Program', fill='Program') +
                 pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2),
                          panel_grid_minor_x=pn.element_blank(),
                          panel_grid_major_x=pn.element_blank(),
                          panel_grid_minor_y=pn.element_line(
                              colour='black', linewidth=0.5, alpha=0.3),
                          panel_grid_major_y=pn.element_line(
                              colour='black', linewidth=1, alpha=0.5))
                 )
        plot3.save(self.output_directory + 'relative_mitigation2.png', width=7, height=3, dpi=900)

        # ---------------------------------------
        # ------ Figure to compare costs  ------
        dfs = self.cost_dfs

        for i in range(len(dfs)):
            n_cols = dfs[i].shape[1]
            dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1)
            dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1)
            dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1)
            dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1)
            dfs[i]['program'] = self.directories[i]

        # Move reference program to the top of the list
        for i, df in enumerate(dfs):
            if df['program'].iloc[0] == self.ref_program:
                dfs.insert(0, dfs.pop(i))

        # Arrange dfs for plot 1
        dfs_p1 = dfs.copy()
        for i in range(len(dfs_p1)):
            # Reshape
            dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean',
                                                    'std', 'low', 'high', 'program'])

        # Combine dataframes into single dataframe for plotting
        df_p1 = dfs_p1[0]
        for i in dfs_p1[1:]:
            df_p1 = df_p1.append(i, ignore_index=True)

        # Output Emissions df for other uses (e.g. live plot)
        df_p1.to_csv(self.output_directory + 'rolling_cost_estimates.csv', index=True)

        # Make plots from list of dataframes - one entry per dataframe
        pn.theme_set(pn.theme_linedraw())
        plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') +
                 pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) +
                 pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) +
                 pn.ylab('Estimated cost per facility') + pn.xlab('') +
                 pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                 pn.scale_x_datetime(labels=date_format('%Y')) +
                 # pn.scale_y_continuous(trans='log10') +
                 pn.labs(color='Program', fill='Program') +
                 pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2),
                          panel_grid_minor_x=pn.element_blank(),
                          panel_grid_major_x=pn.element_blank(),
                          panel_grid_minor_y=pn.element_line(
                              colour='black', linewidth=0.5, alpha=0.3),
                          panel_grid_major_y=pn.element_line(
                              colour='black', linewidth=1, alpha=0.5))
                 )
        plot1.save(self.output_directory + 'cost_estimate_temporal.png', width=7, height=3, dpi=900)

        ########################################
        # Cost breakdown by program and method
        method_lists = []
        for i in range(len(self.directories)):
            df = pd.read_csv(
                self.output_directory + self.directories[i] + "/timeseries_output_0.csv")
            df = df.filter(regex='cost$', axis=1)
            df = df.drop(columns=["total_daily_cost"])
            method_lists.append(list(df))

        costs = [[] for i in range(len(self.all_data))]
        for i in range(len(self.all_data)):
            for j in range(len(self.all_data[i])):
                simcosts = []
                for k in range(len(method_lists[i])):
                    timesteps = len(self.all_data[i][j][method_lists[i][k]])
                    simcosts.append(
                        (sum(self.all_data[i][j][method_lists[i][k]])/timesteps/self.n_sites)*365)
                costs[i].append(simcosts)

        rows_list = []
        for i in range(len(costs)):
            df_temp = pd.DataFrame(costs[i])
            for j in range(len(df_temp.columns)):
                dict = {}
                dict.update({'Program': self.directories[i]})
                dict.update({'Mean Cost': round(df_temp.iloc[:, j].mean())})
                dict.update({'St. Dev.': df_temp.iloc[:, j].std()})
                dict.update({'Method': method_lists[i][j].replace('_cost', '')})
                rows_list.append(dict)
        df = pd.DataFrame(rows_list)

        # Output Emissions df for other uses
        df.to_csv(self.output_directory + 'cost_comparison.csv', index=True)

        plot = (
            pn.ggplot(
                df, pn.aes(
                    x='Program', y='Mean Cost', fill='Method', label='Mean Cost')) +
            pn.geom_bar(stat="identity") + pn.ylab('Cost per Site per Year') + pn.xlab('Program') +
            pn.scale_fill_hue(h=0.15, l=0.25, s=0.9) +
            pn.geom_text(size=15, position=pn.position_stack(vjust=0.5)) +
            pn.theme(
                panel_border=pn.element_rect(colour="black", fill=None, size=2),
                panel_grid_minor_x=pn.element_blank(),
                panel_grid_major_x=pn.element_blank(),
                panel_grid_minor_y=pn.element_line(
                    colour='black', linewidth=0.5, alpha=0.3),
                panel_grid_major_y=pn.element_line(
                    colour='black', linewidth=1, alpha=0.5)))
        plot.save(self.output_directory + 'cost_comparison.png', width=7, height=3, dpi=900)

        return
Пример #9
0
import os
from pathlib import Path
import re
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ipywidgets as ipw
from sklearn import mixture
import skimage.filters
from IPython.display import display, clear_output
import napari

import plotnine as pn
from plotnine import ggplot, geom_point, aes, geom_line, labels

pn.theme_set(pn.theme_classic(base_size=18, base_family="Helvetica"))

font = {
    "family": "sans-serif",
    "color": "black",
    "weight": "normal",
    "size": 16,
}


class Analysis(Bact):
    def __init__(self):
        """Standard __init__ method.

        Parameters
        ----------
Пример #10
0
### Setup
import pandas as pd
import numpy as np
import plotnine as p9
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import itertools
import seaborn as sns
import matplotlib.pyplot as plt
from dimension_reduction_fx import plot_pca_vis, pca_df, pca_plot, sort_df
#import umap #Not used in final version

#Define default plot theme:
p9.theme_set(p9.theme_classic())

#Read in data:
jobs_df = pd.read_csv("./data/jobs_df_clean.csv")
skills_summary_df = pd.read_csv("./data/skills_summary_df.csv")

#Clean jobs data
#jobs_df.columns #Inspect columns
jobs_df = jobs_df.fillna(value="None")
jobs_df['title'] = jobs_df['title'].str.lower()

#Assign 'type' based on simple rules:
jobs_df['type'] = str(0)
jobs_df.loc[jobs_df['title'].str.contains('analyst'), 'type'] = 'analyst'
jobs_df.loc[jobs_df['title'].str.contains('engineer'), 'type'] = 'engineer'
jobs_df.loc[jobs_df['title'].str.contains('scientist'), 'type'] = 'scientist'
jobs_df.loc[jobs_df['title'].str.contains('manager'), 'type'] = 'manager'
jobs_df.loc[jobs_df['title'].str.contains('director'), 'type'] = 'manager'
Пример #11
0
def make_plots(leak_df, time_df, site_df, sim_n, spin_up, output_directory):
    """
    This function makes a set of standard plots to output at end of simulation.
    """
    # Temporarily mute warnings
    warnings.filterwarnings('ignore')
    pn.theme_set(pn.theme_linedraw())

    # Chop off spin-up year (only for plots, still exists in raw output)
    time_df_adj = time_df.iloc[spin_up:, ]

    # Timeseries plots
    plot_time_1 = (
        pn.ggplot(time_df_adj, pn.aes('datetime', 'daily_emissions_kg')) +
        pn.geom_line(size=2) +
        pn.ggtitle('Daily emissions from all sites (kg)') + pn.ylab('') +
        pn.xlab('') + pn.scale_x_datetime(labels=date_format('%Y')) + pn.theme(
            panel_border=pn.element_rect(colour="black", fill=None, size=2),
            panel_grid_minor_x=pn.element_blank(),
            panel_grid_major_x=pn.element_blank(),
            panel_grid_minor_y=pn.element_line(
                colour='black', linewidth=0.5, alpha=0.3),
            panel_grid_major_y=pn.element_line(
                colour='black', linewidth=1, alpha=0.5)))

    plot_time_1.save(output_directory + '/plot_time_emissions_' + sim_n +
                     '.png',
                     width=10,
                     height=3,
                     dpi=300)

    plot_time_2 = (pn.ggplot(time_df_adj, pn.aes('datetime', 'active_leaks')) +
                   pn.geom_line(size=2) +
                   pn.ggtitle('Number of active leaks at all sites') +
                   pn.ylab('') + pn.xlab('') +
                   pn.scale_x_datetime(labels=date_format('%Y')) +
                   pn.theme(panel_border=pn.element_rect(
                       colour="black", fill=None, size=2),
                            panel_grid_minor_x=pn.element_blank(),
                            panel_grid_major_x=pn.element_blank(),
                            panel_grid_minor_y=pn.element_line(
                                colour='black', linewidth=0.5, alpha=0.3),
                            panel_grid_major_y=pn.element_line(
                                colour='black', linewidth=1, alpha=0.5)))

    plot_time_2.save(output_directory + '/plot_time_active_' + sim_n + '.png',
                     width=10,
                     height=3,
                     dpi=300)

    # Site-level plots
    plot_site_1 = (
        pn.ggplot(site_df, pn.aes('cum_frac_sites', 'cum_frac_emissions')) +
        pn.geom_line(size=2) + pn.theme(
            panel_border=pn.element_rect(colour="black", fill=None, size=2),
            panel_grid_minor_x=pn.element_blank(),
            panel_grid_major_x=pn.element_blank(),
            panel_grid_minor_y=pn.element_line(
                colour='black', linewidth=0.5, alpha=0.3),
            panel_grid_major_y=pn.element_line(
                colour='black', linewidth=1, alpha=0.5)) +
        pn.xlab('Cumulative fraction of sites') +
        pn.ylab('Cumulative fraction of emissions') +
        pn.ggtitle('Empirical cumulative distribution of site-level emissions')
    )

    plot_site_1.save(output_directory + '/site_cum_dist_' + sim_n + '.png',
                     width=5,
                     height=4,
                     dpi=300)

    # Leak plots
    plot_leak_1 = (pn.ggplot(leak_df, pn.aes('days_active')) +
                   pn.geom_histogram(colour='gray') +
                   pn.theme(panel_border=pn.element_rect(
                       colour="black", fill=None, size=2),
                            panel_grid_minor_x=pn.element_blank(),
                            panel_grid_major_x=pn.element_blank(),
                            panel_grid_minor_y=pn.element_line(
                                colour='black', linewidth=0.5, alpha=0.3),
                            panel_grid_major_y=pn.element_line(
                                colour='black', linewidth=1, alpha=0.5)) +
                   pn.ggtitle('Distribution of leak duration') +
                   pn.xlab('Number of days the leak was active') +
                   pn.ylab('Count'))
    plot_leak_1.save(output_directory + '/leak_active_hist' + sim_n + '.png',
                     width=5,
                     height=4,
                     dpi=300)

    plot_leak_2 = (pn.ggplot(
        leak_df, pn.aes('cum_frac_leaks', 'cum_frac_rate', colour='status')) +
                   pn.geom_line(size=2) +
                   pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                   pn.theme(panel_border=pn.element_rect(
                       colour="black", fill=None, size=2),
                            panel_grid_minor_x=pn.element_blank(),
                            panel_grid_major_x=pn.element_blank(),
                            panel_grid_minor_y=pn.element_line(
                                colour='black', linewidth=0.5, alpha=0.3),
                            panel_grid_major_y=pn.element_line(
                                colour='black', linewidth=1, alpha=0.5)) +
                   pn.xlab('Cumulative fraction of leak sources') +
                   pn.ylab('Cumulative leak rate fraction') +
                   pn.ggtitle('Fractional cumulative distribution'))

    plot_leak_2.save(output_directory + '/leak_cum_dist1_' + sim_n + '.png',
                     width=4,
                     height=4,
                     dpi=300)

    plot_leak_3 = (pn.ggplot(
        leak_df, pn.aes('cum_frac_leaks', 'cum_rate', colour='status')) +
                   pn.geom_line(size=2) +
                   pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) +
                   pn.theme(panel_border=pn.element_rect(
                       colour="black", fill=None, size=2),
                            panel_grid_minor_x=pn.element_blank(),
                            panel_grid_major_x=pn.element_blank(),
                            panel_grid_minor_y=pn.element_line(
                                colour='black', linewidth=0.5, alpha=0.3),
                            panel_grid_major_y=pn.element_line(
                                colour='black', linewidth=1, alpha=0.5)) +
                   pn.scale_y_continuous(trans='log10') +
                   pn.xlab('Cumulative fraction of leak sources') +
                   pn.ylab('Cumulative emissions (kg/day)') +
                   pn.ggtitle('Absolute cumulative distribution'))

    plot_leak_3.save(output_directory + '/leak_cum_dist2_' + sim_n + '.png',
                     width=4,
                     height=4,
                     dpi=300)

    return
Пример #12
0
#!/usr/bin/env python
# coding: utf-8

import pandas as pd
import plotnine as p9
from pyprojroot import here

p9.theme_set(p9.theme_minimal)
print(f"plotnine=={p9.__version__}")


df = pd.read_csv(here() / ".data" / "titanic.csv")
df.head(3)


# ## Univariate, Continuous Distribution

# ### Histogram

(
    p9.ggplot(df[~df["age"].isna()], p9.aes(x="age"))
    + p9.geom_histogram(binwidth=5)
    + p9.ggtitle("Histogram")
)


# ## ECDF

(
    p9.ggplot(df[~df["age"].isna()], p9.aes(x="age"))
    + p9.stat_ecdf()