def pca_plot(pca_data: pd.DataFrame, dim1: str, dim2: str, dim3: str): """ Returns plot displaying 3 PCA variables (including color). Parameters ---------- pca: Fitted pca object to plot. df: Dataframe pca was fit on. Used for column names. dim1: String of column name of principal component to plot on x-axis. dim2: String of column name of principal component to plot on y-axis. dim3: String of column name of principal component to plot as colour. Returns ---------- Plot of PCA with dim1 on x-axis, dim2 on y-axis, and coloured by dim3 """ #Set plot theme within function: p9.theme_set(p9.theme_classic()) num_components = len(pca_data.columns) - 1 color_type = type(pca_data.loc[0, dim3]) p = (p9.ggplot(pca_data, p9.aes(x=dim1, y=dim2, fill=dim3)) + p9.geom_point() ) if(color_type==str): print('color type is qualitative') #Can't find a better colour palette yet. #p = p + (p9.scale_fill_brewer(type="qual", palette='Accent')) return(p)
def derplot(adata=None, filename='derplot', embedding='tsne', feature='sample_type_tech', size=(12, 12), save=False, draw=False, psize=1): start = datetime.datetime.now() p.options.figure_size = size savename = filename + '.' + embedding + '.' + feature + '.derplot.png' print( start.strftime("%H:%M:%S"), 'Starting ... \t', savename, ) p.theme_set(p.theme_classic()) pt = \ p.ggplot(p.aes(embedding +'0', embedding + '1', color=feature), adata.obs) \ + p.geom_point(size=psize, alpha = 1, stroke = 0 ) \ + p.guides(color = p.guide_legend(override_aes={'size': 15})) if save: pt.save(savename, format='png', dpi=200) end = datetime.datetime.now() delta = end - start print(start.strftime("%H:%M:%S"), str(int(delta.total_seconds())), 's to make: \t', savename)
def plot_pca_vis(pca: PCA, df: pd.DataFrame, pc_x: int = 0, pc_y: int = 1, num_dims: int = 5) -> plt: """ Plot contribution of different dimensions to principal components. Parameters ---------- pca: Fitted pca object to plot. df: Dataframe pca was fit on. Used for column names. pc_x: Index of principal component to plot on x-axis. pc_y: Index of principal component to plot on y-axis. num_dims: Number of contributing elements to include for each axis. Returns ---------- Null Prints matplotlib.plt object. https://stackoverflow.com/questions/45148539/project-variables-in-pca-plot-in-python Adapted into function by Tim Cashion """ #Set plot theme within function: p9.theme_set(p9.theme_classic()) # Get the PCA components (loadings) PCs = pca.components_ PC_x_index = PCs[pc_x, : ].argsort()[-num_dims:][::-1] PC_y_index = PCs[pc_y, : ].argsort()[-num_dims:][::-1] combined_index = set(list(PC_x_index) + list(PC_y_index)) combined_index = sorted(combined_index) PCs = PCs[:, combined_index] # Use quiver to generate the basic plot fig = plt.figure(figsize=(5,5)) plt.quiver(np.zeros(PCs.shape[1]), np.zeros(PCs.shape[1]), PCs[pc_x,:], PCs[pc_y,:], angles='xy', scale_units='xy', scale=1) # Add labels based on feature names (here just numbers) feature_names = df.columns[combined_index] for i,j,z in zip(PCs[pc_y,:]+0.02, PCs[pc_x,:]+0.02, feature_names): plt.text(j, i, z, ha='center', va='center') # Add unit circle circle = plt.Circle((0,0), 1, facecolor='none', edgecolor='b') plt.gca().add_artist(circle) # Ensure correct aspect ratio and axis limits plt.axis('equal') plt.xlim([-1.0,1.0]) plt.ylim([-1.0,1.0]) # Label axes plt.xlabel('PC ' + str(pc_x)) plt.ylabel('PC ' + str(pc_y)) plt.tight_layout() return plt
def plot_fusion(self): """ plot fusion count """ p9.theme_set(p9.theme_void()) for ref in self.pos_dict: if ref in self.df_tsne.columns: out_plot_file = f'{self.out_prefix}_{ref}_fusion.pdf' plot = p9.ggplot(self.df_tsne, p9.aes(x="tSNE_1", y="tSNE_2", color=ref)) + \ p9.geom_point(size=0.2) + \ p9.theme_bw() + \ p9.scale_color_gradient(low="lightgrey",high="blue") plot.save(out_plot_file)
import argparse import importlib import collections import os import sys import numpy as np import pandas as pd import plotnine as gg import base.plot as bp from base import config_lib sys.path.append(os.getcwd()) gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) # FIGURE_OPTIONS will hold all of the details for specific details to reproduce # each figure. These include the config, number of jobs and the plot function. # Figures are named with reference to "A Tutorial on Thompson Sampling": # https://arxiv.org/abs/1707.02038. FigureOptions = collections.namedtuple( 'FigureOptions', ['fig_name', 'config', 'paper_n_jobs', 'plot_fun']) FIGURE_OPTIONS = collections.OrderedDict([ [ '3', FigureOptions(fig_name='3', config='finite_arm.config_simple', paper_n_jobs=20000,
job_config = config_lib.get_job_config(config, job_id) experiment = job_config['experiment'] experiment.run_experiment() results.append(experiment.results) ############################################################################# # Collating data with Pandas params_df = config_lib.get_params_df(config) df = pd.merge(pd.concat(results), params_df, on='unique_id') plt_df = (df.groupby(['agent', 't']).agg({ 'instant_regret': np.mean }).reset_index()) ############################################################################# # Plotting and analysis (uses plotnine by default) """ gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8)) p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line()) print(p) """ """ plt_df_cum_regret = (df.groupby(['agent', 't']) .agg({'cum_regret': np.mean}) .reset_index()) q = (gg.ggplot(plt_df_cum_regret) + gg.aes('t', 'cum_regret', colour='agent')
def make_plots(): # Setup plotting import pandas as pd import plotnine as gg import warnings pd.options.mode.chained_assignment = None gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8), panel_spacing_x=0.5, panel_spacing_y=0.5) warnings.filterwarnings('ignore') # Load Results experiments = { 'Random': RANDOM_RESULTS_PATH, 'TRPO': TRPO_RESULTS_PATH, } data_frame, sweep_vars = csv_load.load_bsuite(experiments) bsuite_score = summary_analysis.bsuite_score(data_frame, sweep_vars) bsuite_summary = summary_analysis.ave_score_by_tag(bsuite_score, sweep_vars) # Generate general plots radar_fig = summary_analysis.bsuite_radar_plot(bsuite_summary, sweep_vars) radar_fig.savefig(PLOTS_PATH + 'radar_fig.png', bbox_inches='tight') bar_fig = summary_analysis.bsuite_bar_plot(bsuite_score, sweep_vars) bar_fig.save(PLOTS_PATH + 'bar_fig.png') compare_bar_fig = summary_analysis.bsuite_bar_plot_compare( bsuite_score, sweep_vars) compare_bar_fig.save(PLOTS_PATH + 'compare_bar_fig.png') # Generate specific analyses # Learning performance from bsuite.experiments.bandit import analysis as bandit_analysis bandit_df = data_frame[data_frame.bsuite_env == 'bandit'].copy() bandit_scores = summary_analysis.plot_single_experiment( bsuite_score, 'bandit', sweep_vars) bandit_scores.save(PLOTS_PATH + 'bandits_scores.png') bandit_convergence = bandit_analysis.plot_learning(bandit_df, sweep_vars) bandit_convergence.save(PLOTS_PATH + 'bandits_convergence.png') bandit_seeds = bandit_analysis.plot_seeds(bandit_df, sweep_vars) bandit_seeds.save(PLOTS_PATH + 'bandits_seeds.png') # Robustness to noise from bsuite.experiments.bandit_noise import analysis as bandit_noise_analysis bandit_noise_df = data_frame[data_frame.bsuite_env == 'bandit_noise'].copy() bandit_noise_overall = summary_analysis.plot_single_experiment( bsuite_score, 'bandit_noise', sweep_vars) bandit_noise_overall.save(PLOTS_PATH + 'bandits_noise_overall.png') bandit_noise_avg = bandit_noise_analysis.plot_average( bandit_noise_df, sweep_vars) bandit_noise_avg.save(PLOTS_PATH + 'bandits_noise_avg.png') bandit_noise_regret = bandit_noise_analysis.plot_learning( bandit_noise_df, sweep_vars) bandit_noise_regret.save(PLOTS_PATH + 'bandits_noise_regret.png') # Robustness to reward scaling from bsuite.experiments.bandit_scale import analysis as bandit_scale_analysis bandit_scale_df = data_frame[data_frame.bsuite_env == 'bandit_scale'].copy() bandit_scale_overall = summary_analysis.plot_single_experiment( bsuite_score, 'bandit_scale', sweep_vars) bandit_scale_overall.save(PLOTS_PATH + 'bandits_scale_overall.png') bandit_scale_avg = bandit_scale_analysis.plot_average( bandit_scale_df, sweep_vars) bandit_scale_avg.save(PLOTS_PATH + 'bandits_scale_avg.png') bandit_scale_learn = bandit_scale_analysis.plot_learning( bandit_scale_df, sweep_vars) bandit_scale_learn.save(PLOTS_PATH + 'bandits_scale_learn.png') """
def batch_plots(self): # First, put together active leak data and output for live plotting functionality # (no AL plot here currently) dfs = self.active_leak_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'mean_active_leaks.csv', index=True) # Now repeat for emissions (which will actually be used for batch plotting) dfs = self.emission_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'mean_emissions.csv', index=True) # Make plots from list of dataframes - one entry per dataframe pn.theme_set(pn.theme_linedraw()) plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') + pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) + pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) + pn.ylab('Daily emissions (kg/site)') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.scale_y_continuous(trans='log10') + pn.ggtitle('To reduce uncertainty, use more simulations.') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot1.save(self.output_directory + 'program_comparison.png', width=7, height=3, dpi=900) # Build relative mitigation plots dfs_p2 = dfs.copy() for i in dfs_p2[1:]: i['mean_dif'] = 0 i['std_dif'] = 0 i['mean_ratio'] = 0 i['std_ratio'] = 0 for j in range(len(i)): ref_mean = dfs_p2[0].loc[dfs_p2[0].index[j], 'mean'] ref_std = dfs_p2[0].loc[dfs_p2[0].index[j], 'std'] alt_mean = i.loc[i.index[j], 'mean'] alt_std = i.loc[i.index[j], 'std'] i.loc[i.index[j], 'mean_dif'] = alt_mean - ref_mean i.loc[i.index[j], 'std_dif'] = math.sqrt( math.pow(alt_std, 2) + math.pow(ref_std, 2)) i.loc[i.index[j], 'mean_ratio'] = alt_mean / ref_mean i.loc[i.index[j], 'std_ratio'] = math.sqrt( math.pow((alt_std / alt_mean), 2) + math.pow((ref_std / ref_mean), 2)) # Build plotting dataframe df_p2 = self.dates_trunc.copy().to_frame() df_p2['program'] = dfs_p2[1]['program'] df_p2['mean_dif'] = dfs_p2[1]['mean_dif'] df_p2['std_dif'] = dfs_p2[1]['std_dif'] df_p2['mean_ratio'] = dfs_p2[1]['mean_ratio'] df_p2['std_ratio'] = dfs_p2[1]['std_ratio'] df_p2['low_dif'] = dfs_p2[1]['mean_dif'] - 2 * dfs_p2[1]['std_dif'] df_p2['high_dif'] = dfs_p2[1]['mean_dif'] + 2 * dfs_p2[1]['std_dif'] df_p2['low_ratio'] = dfs_p2[1]['mean_ratio'] / (dfs_p2[1] ['mean_ratio'] + 2 * dfs_p2[1]['std_ratio']) df_p2['high_ratio'] = dfs_p2[1]['mean_ratio'] + 2 * dfs_p2[1]['std_ratio'] pd.options.mode.chained_assignment = None for i in dfs_p2[2:]: i['low_dif'] = i['mean_dif'] - 2 * i['std_dif'] i['high_dif'] = i['mean_dif'] + 2 * i['std_dif'] i['low_ratio'] = i['mean_ratio'] / (i['mean_ratio'] + 2 * i['std_ratio']) i['high_ratio'] = i['mean_ratio'] + 2 * i['std_ratio'] short_df = i[['program', 'mean_dif', 'std_dif', 'low_dif', 'high_dif', 'mean_ratio', 'std_ratio', 'low_ratio', 'high_ratio']] short_df['datetime'] = np.array(self.dates_trunc) df_p2 = df_p2.append(short_df, ignore_index=True) # Make plot 2 plot2 = (pn.ggplot(None) + pn.aes('datetime', 'mean_dif', group='program') + pn.geom_ribbon( df_p2, pn.aes(ymin='low_dif', ymax='high_dif', fill='program'), alpha=0.2) + pn.geom_line(df_p2, pn.aes('datetime', 'mean_dif', colour='program'), size=1) + pn.ylab('Daily emissions difference (kg/site)') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.ggtitle('Daily differences may be uncertain for small sample sizes') + # pn.scale_y_continuous(trans='log10') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot2.save(self.output_directory + 'relative_mitigation.png', width=7, height=3, dpi=900) # Make plot 3 plot3 = (pn.ggplot(None) + pn.aes('datetime', 'mean_ratio', group='program') + pn.geom_ribbon(df_p2, pn.aes( ymin='low_ratio', ymax='high_ratio', fill='program'), alpha=0.2) + pn.geom_hline(yintercept=1, size=0.5, colour='blue') + pn.geom_line(df_p2, pn.aes('datetime', 'mean_ratio', colour='program'), size=1) + pn.ylab('Emissions ratio') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.ggtitle( 'Blue line represents equivalence. \nIf uncertainty is high, use more ' 'simulations and/or sites. \nLook also at ratio of mean daily emissions' 'over entire timeseries.') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot3.save(self.output_directory + 'relative_mitigation2.png', width=7, height=3, dpi=900) # --------------------------------------- # ------ Figure to compare costs ------ dfs = self.cost_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'rolling_cost_estimates.csv', index=True) # Make plots from list of dataframes - one entry per dataframe pn.theme_set(pn.theme_linedraw()) plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') + pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) + pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) + pn.ylab('Estimated cost per facility') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + # pn.scale_y_continuous(trans='log10') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot1.save(self.output_directory + 'cost_estimate_temporal.png', width=7, height=3, dpi=900) ######################################## # Cost breakdown by program and method method_lists = [] for i in range(len(self.directories)): df = pd.read_csv( self.output_directory + self.directories[i] + "/timeseries_output_0.csv") df = df.filter(regex='cost$', axis=1) df = df.drop(columns=["total_daily_cost"]) method_lists.append(list(df)) costs = [[] for i in range(len(self.all_data))] for i in range(len(self.all_data)): for j in range(len(self.all_data[i])): simcosts = [] for k in range(len(method_lists[i])): timesteps = len(self.all_data[i][j][method_lists[i][k]]) simcosts.append( (sum(self.all_data[i][j][method_lists[i][k]])/timesteps/self.n_sites)*365) costs[i].append(simcosts) rows_list = [] for i in range(len(costs)): df_temp = pd.DataFrame(costs[i]) for j in range(len(df_temp.columns)): dict = {} dict.update({'Program': self.directories[i]}) dict.update({'Mean Cost': round(df_temp.iloc[:, j].mean())}) dict.update({'St. Dev.': df_temp.iloc[:, j].std()}) dict.update({'Method': method_lists[i][j].replace('_cost', '')}) rows_list.append(dict) df = pd.DataFrame(rows_list) # Output Emissions df for other uses df.to_csv(self.output_directory + 'cost_comparison.csv', index=True) plot = ( pn.ggplot( df, pn.aes( x='Program', y='Mean Cost', fill='Method', label='Mean Cost')) + pn.geom_bar(stat="identity") + pn.ylab('Cost per Site per Year') + pn.xlab('Program') + pn.scale_fill_hue(h=0.15, l=0.25, s=0.9) + pn.geom_text(size=15, position=pn.position_stack(vjust=0.5)) + pn.theme( panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5))) plot.save(self.output_directory + 'cost_comparison.png', width=7, height=3, dpi=900) return
import os from pathlib import Path import re import matplotlib.pyplot as plt import numpy as np import pandas as pd import ipywidgets as ipw from sklearn import mixture import skimage.filters from IPython.display import display, clear_output import napari import plotnine as pn from plotnine import ggplot, geom_point, aes, geom_line, labels pn.theme_set(pn.theme_classic(base_size=18, base_family="Helvetica")) font = { "family": "sans-serif", "color": "black", "weight": "normal", "size": 16, } class Analysis(Bact): def __init__(self): """Standard __init__ method. Parameters ----------
### Setup import pandas as pd import numpy as np import plotnine as p9 from sklearn.decomposition import PCA from sklearn.cluster import KMeans import itertools import seaborn as sns import matplotlib.pyplot as plt from dimension_reduction_fx import plot_pca_vis, pca_df, pca_plot, sort_df #import umap #Not used in final version #Define default plot theme: p9.theme_set(p9.theme_classic()) #Read in data: jobs_df = pd.read_csv("./data/jobs_df_clean.csv") skills_summary_df = pd.read_csv("./data/skills_summary_df.csv") #Clean jobs data #jobs_df.columns #Inspect columns jobs_df = jobs_df.fillna(value="None") jobs_df['title'] = jobs_df['title'].str.lower() #Assign 'type' based on simple rules: jobs_df['type'] = str(0) jobs_df.loc[jobs_df['title'].str.contains('analyst'), 'type'] = 'analyst' jobs_df.loc[jobs_df['title'].str.contains('engineer'), 'type'] = 'engineer' jobs_df.loc[jobs_df['title'].str.contains('scientist'), 'type'] = 'scientist' jobs_df.loc[jobs_df['title'].str.contains('manager'), 'type'] = 'manager' jobs_df.loc[jobs_df['title'].str.contains('director'), 'type'] = 'manager'
def make_plots(leak_df, time_df, site_df, sim_n, spin_up, output_directory): """ This function makes a set of standard plots to output at end of simulation. """ # Temporarily mute warnings warnings.filterwarnings('ignore') pn.theme_set(pn.theme_linedraw()) # Chop off spin-up year (only for plots, still exists in raw output) time_df_adj = time_df.iloc[spin_up:, ] # Timeseries plots plot_time_1 = ( pn.ggplot(time_df_adj, pn.aes('datetime', 'daily_emissions_kg')) + pn.geom_line(size=2) + pn.ggtitle('Daily emissions from all sites (kg)') + pn.ylab('') + pn.xlab('') + pn.scale_x_datetime(labels=date_format('%Y')) + pn.theme( panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5))) plot_time_1.save(output_directory + '/plot_time_emissions_' + sim_n + '.png', width=10, height=3, dpi=300) plot_time_2 = (pn.ggplot(time_df_adj, pn.aes('datetime', 'active_leaks')) + pn.geom_line(size=2) + pn.ggtitle('Number of active leaks at all sites') + pn.ylab('') + pn.xlab('') + pn.scale_x_datetime(labels=date_format('%Y')) + pn.theme(panel_border=pn.element_rect( colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5))) plot_time_2.save(output_directory + '/plot_time_active_' + sim_n + '.png', width=10, height=3, dpi=300) # Site-level plots plot_site_1 = ( pn.ggplot(site_df, pn.aes('cum_frac_sites', 'cum_frac_emissions')) + pn.geom_line(size=2) + pn.theme( panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) + pn.xlab('Cumulative fraction of sites') + pn.ylab('Cumulative fraction of emissions') + pn.ggtitle('Empirical cumulative distribution of site-level emissions') ) plot_site_1.save(output_directory + '/site_cum_dist_' + sim_n + '.png', width=5, height=4, dpi=300) # Leak plots plot_leak_1 = (pn.ggplot(leak_df, pn.aes('days_active')) + pn.geom_histogram(colour='gray') + pn.theme(panel_border=pn.element_rect( colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) + pn.ggtitle('Distribution of leak duration') + pn.xlab('Number of days the leak was active') + pn.ylab('Count')) plot_leak_1.save(output_directory + '/leak_active_hist' + sim_n + '.png', width=5, height=4, dpi=300) plot_leak_2 = (pn.ggplot( leak_df, pn.aes('cum_frac_leaks', 'cum_frac_rate', colour='status')) + pn.geom_line(size=2) + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.theme(panel_border=pn.element_rect( colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) + pn.xlab('Cumulative fraction of leak sources') + pn.ylab('Cumulative leak rate fraction') + pn.ggtitle('Fractional cumulative distribution')) plot_leak_2.save(output_directory + '/leak_cum_dist1_' + sim_n + '.png', width=4, height=4, dpi=300) plot_leak_3 = (pn.ggplot( leak_df, pn.aes('cum_frac_leaks', 'cum_rate', colour='status')) + pn.geom_line(size=2) + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.theme(panel_border=pn.element_rect( colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) + pn.scale_y_continuous(trans='log10') + pn.xlab('Cumulative fraction of leak sources') + pn.ylab('Cumulative emissions (kg/day)') + pn.ggtitle('Absolute cumulative distribution')) plot_leak_3.save(output_directory + '/leak_cum_dist2_' + sim_n + '.png', width=4, height=4, dpi=300) return
#!/usr/bin/env python # coding: utf-8 import pandas as pd import plotnine as p9 from pyprojroot import here p9.theme_set(p9.theme_minimal) print(f"plotnine=={p9.__version__}") df = pd.read_csv(here() / ".data" / "titanic.csv") df.head(3) # ## Univariate, Continuous Distribution # ### Histogram ( p9.ggplot(df[~df["age"].isna()], p9.aes(x="age")) + p9.geom_histogram(binwidth=5) + p9.ggtitle("Histogram") ) # ## ECDF ( p9.ggplot(df[~df["age"].isna()], p9.aes(x="age")) + p9.stat_ecdf()