def plot_predict(forecast): p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) + geom_point(colour='blue', alpha=0.3, na_rm=True) + geom_line(colour='blue', na_rm=True) + geom_line( data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') + geom_ribbon(data=forecast, mapping=aes(ymin='yhat_lower', ymax='yhat_upper'), fill='blue', alpha=0.1) + scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') + xlab('Time') + ylab('Pressure') + theme_bw() + theme(axis_text_x=element_text( angle=45, hjust=1, face='bold', color='black'), axis_text_y=element_text(face='bold', colour='black'))) ggplot.save(p, filename='predict_pressure_chart.png', path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'png'), width=8, height=6, units='in', dpi=326, verbose=False) return p
def cum_regret_plot(experiment_name, data_path=_DEFAULT_DATA_PATH): """Simple plot of average instantaneous regret by agent, per timestep. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf """ df = load_data(experiment_name, data_path) plt_df = (df.groupby(['t', 'agent']).agg({ 'cum_regret': [np.mean, lower_interval, upper_interval] }).reset_index()) plt_df.columns = ['_'.join(i) for i in plt_df.columns.values] p = (gg.ggplot(plt_df) + gg.aes('t_', 'cum_regret_mean', colour='agent_') + gg.geom_line(size=1.25, alpha=0.75) + gg.geom_ribbon(gg.aes(ymin='cum_regret_lower_interval', ymax='cum_regret_upper_interval', fill='agent_'), alpha=0.1) + gg.xlab('time period (t)') + gg.ylab('cumulative regret') + gg.scale_colour_brewer(name='agent_', type='qual', palette='Set1')) plot_dict = {experiment_name + '_cum_regret': p} return plot_dict
def test_ribbon_facetting(): p = (ggplot(df, aes('x', ymin='ymin', ymax='ymax', fill='factor(z)')) + geom_ribbon() + facet_wrap('~ z') ) assert p + _theme == 'ribbon_facetting'
def test_ribbon_aesthetics(): p = (ggplot(df, aes('x', ymin='ymin', ymax='ymax', group='factor(z)')) + geom_ribbon() + geom_ribbon(aes('x+width', alpha='z')) + geom_ribbon(aes('x+2*width', linetype='factor(z)'), color='black', fill=None, size=2) + geom_ribbon(aes('x+3*width', color='z'), fill=None, size=2) + geom_ribbon(aes('x+4*width', fill='factor(z)')) + geom_ribbon(aes('x+5*width', size='z'), color='black', fill=None) + scale_x_continuous( breaks=[i * 2 * np.pi for i in range(7)], labels=['0'] + [r'${}\pi$'.format(2 * i) for i in range(1, 7)])) assert p + _theme == 'ribbon_aesthetics'
def plot_arima(df): df['Timestamp'] = pd.to_datetime(df['Timestamp']) p = ( ggplot(data=df, mapping=aes(x='Timestamp', y=df.columns.values[1])) + geom_point(colour='blue', alpha=0.3, na_rm=True) + geom_line(colour='blue', na_rm=True) + geom_point(mapping=aes(x='Timestamp', y=df.columns.values[2]), colour='red', alpha=0.3, na_rm=True) + geom_line(mapping=aes(x='Timestamp', y=df.columns.values[2]), colour='red', na_rm=True) + geom_vline(xintercept=max(df[['Timestamp', df.columns.values[1] ]].dropna(axis=0)['Timestamp']), color='green', linetype='dashed') + # geom_line(mapping=aes(x='Timestamp', y='Lower'), colour='green', na_rm=True, alpha=0.3) + # geom_line(mapping=aes(x='Timestamp', y='Upper'), colour='green', na_rm=True, alpha=0.3) + geom_ribbon(data=df, mapping=aes(ymin='Lower', ymax='Upper'), fill='red', alpha=0.1) + scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') + xlab('Time') + ylab(df.columns.values[1]) + theme_bw() + theme(axis_text_x=element_text( angle=45, hjust=1, face='bold', color='black'), axis_text_y=element_text(face='bold', colour='black'))) ggplot.save(p, filename=df.columns.values[1] + '_predict.png', path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'png'), width=8, height=6, units='in', dpi=326, verbose=False) return p
def test_ribbon_aesthetics(): p = (ggplot(df, aes('x', ymin='ymin', ymax='ymax', group='factor(z)')) + geom_ribbon() + geom_ribbon(aes('x+width', alpha='z')) + geom_ribbon(aes('x+2*width', linetype='factor(z)'), color='black', fill=None, size=2) + geom_ribbon(aes('x+3*width', color='z'), fill=None, size=2) + geom_ribbon(aes('x+4*width', fill='factor(z)')) + geom_ribbon(aes('x+5*width', size='z'), color='black', fill=None) + scale_x_continuous( breaks=[i*2*np.pi for i in range(7)], labels=['0'] + [r'${}\pi$'.format(2*i) for i in range(1, 7)]) ) assert p + _theme == 'ribbon_aesthetics'
params_df = config_lib.get_params_df(config) df = pd.merge(pd.concat(results), params_df, on='unique_id') plt_df = (df.groupby(['agent', 't']).agg({ 'avg_reward': [np.mean, lower_interval, upper_interval] }).reset_index()) plt_df.columns = ['_'.join(i) for i in plt_df.columns.values] ############################################################################# # Plotting and analysis (uses plotnine by default) gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8)) p = (gg.ggplot(plt_df) + gg.aes('t_', 'avg_reward_mean', colour='agent_') + gg.geom_line() + gg.aes(ymin='avg_reward_lower_interval', ymax='avg_reward_upper_interval', fill='agent_') + gg.geom_ribbon(alpha=0.1)) print(p) ############################################################################# # Collating data with Pandas params_df = config_lib.get_params_df(config) df = pd.merge(pd.concat(results), params_df, on='unique_id') plt_df = (df.groupby(['agent', 't']).agg({'num_query': np.mean}).reset_index()) ############################################################################# # Plotting and analysis (uses plotnine by default) gg.theme_set(gg.theme_bw(base_size=16, base_family='serif')) gg.theme_update(figure_size=(12, 8)) p = (gg.ggplot(plt_df) + gg.aes('t', 'num_query', colour='agent') + gg.geom_line())
def plot_predictions_actual(pred_df, figsize): return (pn.ggplot(pred_df, pn.aes(x='y', y='pred')) + pn.geom_point() + pn.geom_ribbon(pn.aes(ymin='lb', ymax='ub'), alpha=0.3) + pn.geom_abline(slope=1, intercept=0) + pn.theme_bw() + pn.theme(figure_size=figsize))
na_rm=True, alpha=0.2) g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group'))) g += p9.scale_colour_manual(values=ez_colors(g.n_groups('group'))) elif geom == 'ribbon': g = EZPlot(gdata.dropna()) # set groups if group is None: g += p9.geom_ribbon(p9.aes(x="x", y='center', ymin='low', ymax='high'), fill=ez_colors(1)[0], alpha=0.2, na_rm=False) g += p9.geom_line(p9.aes(x="x", y='center'), colour=ez_colors(1)[0], na_rm=False) else: g += p9.geom_ribbon( p9.aes(x="x", y='center', ymin='low', ymax='high', group="group", fill="group"), na_rm=True,
def line_plot(df, x, y, group=None, facet_x=None, facet_y=None, aggfun='sum', err=None, show_points=False, base_size=10, figure_size=(6, 3)): ''' Aggregates data in df and plots multiple columns as a line chart. Parameters ---------- df : pd.DataFrame input dataframe x : str quoted expression to be plotted on the x axis y : str or list of str quoted expression(s) to be plotted on the y axis group : str quoted expression to be used as group (ie color) facet_x : str quoted expression to be used as facet facet_y : str quoted expression to be used as facet aggfun : str or fun function to be used for aggregating (eg sum, mean, median ...) err : str quoted expression to be used as error shaded area show_points : bool show/hide markers base_size : int base size for theme_ez figure_size :tuple of int figure size Returns ------- g : EZPlot EZplot object ''' if group is not None and isinstance(y, list) and len(y) > 1: log.error( "groups can be specified only when a single y column is present") raise ValueError( "groups can be specified only when a single y column is present") if err is not None and isinstance(y, list) and len(y) > 1: log.error( "err can be specified only when a single y column is present") raise ValueError( "err can be specified only when a single y column is present") if isinstance(y, list) and len(y) == 1: y = y[0] # create a copy of the data dataframe = df.copy() # define groups and variables; remove and store (eventual) names names = {} groups = {} variables = {} for label, var in zip(['x', 'group', 'facet_x', 'facet_y'], [x, group, facet_x, facet_y]): names[label], groups[label] = unname(var) # fix special cases if x == '.index': groups['x'] = '.index' names[ 'x'] = dataframe.index.name if dataframe.index.name is not None else '' if isinstance(y, list): ys = [] for i, var in enumerate(y): ys.append('y_{}'.format(i)) names['y_{}'.format(i)], variables['y_{}'.format(i)] = unname(var) # aggregate data tmp_gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) groups_present = [ c for c in ['x', 'facet_x', 'facet_y'] if c in tmp_gdata.columns ] gdata = pd.melt(tmp_gdata, groups_present, var_name='group', value_name='y') gdata['group'] = gdata['group'].replace( {var: names[var] for var in ys}) # update values for plotting names['y'] = 'Value' names['group'] = 'Variable' group = 'Variable' else: names['y'], variables['y'] = unname(y) if err is not None: names['err'], variables['err'] = unname(err) # aggregate data gdata = agg_data(dataframe, variables, groups, aggfun, fill_groups=True) # reorder columns gdata = gdata[[ c for c in ['x', 'y', 'err', 'group', 'facet_x', 'facet_y'] if c in gdata.columns ]] if err is not None: gdata['ymax'] = gdata['y'] + gdata['err'] gdata['ymin'] = gdata['y'] - gdata['err'] # init plot obj g = EZPlot(gdata) # set groups if group is None: g += p9.geom_line(p9.aes(x="x", y="y"), group=1, colour=ez_colors(1)[0]) if show_points: g += p9.geom_point(p9.aes(x="x", y="y"), group=1, colour=ez_colors(1)[0]) if err is not None: g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin"), group=1, fill=ez_colors(1)[0], alpha=0.2) else: g += p9.geom_line( p9.aes(x="x", y="y", group="factor(group)", colour="factor(group)")) if show_points: g += p9.geom_point(p9.aes(x="x", y="y", colour="factor(group)")) if err is not None: g += p9.geom_ribbon(p9.aes(x="x", ymax="ymax", ymin="ymin", fill="factor(group)"), alpha=0.2) g += p9.scale_color_manual(values=ez_colors(g.n_groups('group'))) g += p9.scale_fill_manual(values=ez_colors(g.n_groups('group'))) # set facets if facet_x is not None and facet_y is None: g += p9.facet_wrap('~facet_x') if facet_x is not None and facet_y is not None: g += p9.facet_grid('facet_y~facet_x') # set x scale if g.column_is_timestamp('x'): g += p9.scale_x_datetime() elif g.column_is_categorical('x'): g += p9.scale_x_discrete() else: g += p9.scale_x_continuous(labels=ez_labels) # set y scale g += p9.scale_y_continuous(labels=ez_labels) # set axis labels g += \ p9.xlab(names['x']) + \ p9.ylab(names['y']) # set theme g += theme_ez(figure_size=figure_size, base_size=base_size, legend_title=p9.element_text(text=names['group'], size=base_size)) return g
def batch_plots(self): # First, put together active leak data and output for live plotting functionality # (no AL plot here currently) dfs = self.active_leak_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'mean_active_leaks.csv', index=True) # Now repeat for emissions (which will actually be used for batch plotting) dfs = self.emission_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'mean_emissions.csv', index=True) # Make plots from list of dataframes - one entry per dataframe pn.theme_set(pn.theme_linedraw()) plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') + pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) + pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) + pn.ylab('Daily emissions (kg/site)') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.scale_y_continuous(trans='log10') + pn.ggtitle('To reduce uncertainty, use more simulations.') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot1.save(self.output_directory + 'program_comparison.png', width=7, height=3, dpi=900) # Build relative mitigation plots dfs_p2 = dfs.copy() for i in dfs_p2[1:]: i['mean_dif'] = 0 i['std_dif'] = 0 i['mean_ratio'] = 0 i['std_ratio'] = 0 for j in range(len(i)): ref_mean = dfs_p2[0].loc[dfs_p2[0].index[j], 'mean'] ref_std = dfs_p2[0].loc[dfs_p2[0].index[j], 'std'] alt_mean = i.loc[i.index[j], 'mean'] alt_std = i.loc[i.index[j], 'std'] i.loc[i.index[j], 'mean_dif'] = alt_mean - ref_mean i.loc[i.index[j], 'std_dif'] = math.sqrt( math.pow(alt_std, 2) + math.pow(ref_std, 2)) i.loc[i.index[j], 'mean_ratio'] = alt_mean / ref_mean i.loc[i.index[j], 'std_ratio'] = math.sqrt( math.pow((alt_std / alt_mean), 2) + math.pow((ref_std / ref_mean), 2)) # Build plotting dataframe df_p2 = self.dates_trunc.copy().to_frame() df_p2['program'] = dfs_p2[1]['program'] df_p2['mean_dif'] = dfs_p2[1]['mean_dif'] df_p2['std_dif'] = dfs_p2[1]['std_dif'] df_p2['mean_ratio'] = dfs_p2[1]['mean_ratio'] df_p2['std_ratio'] = dfs_p2[1]['std_ratio'] df_p2['low_dif'] = dfs_p2[1]['mean_dif'] - 2 * dfs_p2[1]['std_dif'] df_p2['high_dif'] = dfs_p2[1]['mean_dif'] + 2 * dfs_p2[1]['std_dif'] df_p2['low_ratio'] = dfs_p2[1]['mean_ratio'] / (dfs_p2[1] ['mean_ratio'] + 2 * dfs_p2[1]['std_ratio']) df_p2['high_ratio'] = dfs_p2[1]['mean_ratio'] + 2 * dfs_p2[1]['std_ratio'] pd.options.mode.chained_assignment = None for i in dfs_p2[2:]: i['low_dif'] = i['mean_dif'] - 2 * i['std_dif'] i['high_dif'] = i['mean_dif'] + 2 * i['std_dif'] i['low_ratio'] = i['mean_ratio'] / (i['mean_ratio'] + 2 * i['std_ratio']) i['high_ratio'] = i['mean_ratio'] + 2 * i['std_ratio'] short_df = i[['program', 'mean_dif', 'std_dif', 'low_dif', 'high_dif', 'mean_ratio', 'std_ratio', 'low_ratio', 'high_ratio']] short_df['datetime'] = np.array(self.dates_trunc) df_p2 = df_p2.append(short_df, ignore_index=True) # Make plot 2 plot2 = (pn.ggplot(None) + pn.aes('datetime', 'mean_dif', group='program') + pn.geom_ribbon( df_p2, pn.aes(ymin='low_dif', ymax='high_dif', fill='program'), alpha=0.2) + pn.geom_line(df_p2, pn.aes('datetime', 'mean_dif', colour='program'), size=1) + pn.ylab('Daily emissions difference (kg/site)') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.ggtitle('Daily differences may be uncertain for small sample sizes') + # pn.scale_y_continuous(trans='log10') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot2.save(self.output_directory + 'relative_mitigation.png', width=7, height=3, dpi=900) # Make plot 3 plot3 = (pn.ggplot(None) + pn.aes('datetime', 'mean_ratio', group='program') + pn.geom_ribbon(df_p2, pn.aes( ymin='low_ratio', ymax='high_ratio', fill='program'), alpha=0.2) + pn.geom_hline(yintercept=1, size=0.5, colour='blue') + pn.geom_line(df_p2, pn.aes('datetime', 'mean_ratio', colour='program'), size=1) + pn.ylab('Emissions ratio') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + pn.ggtitle( 'Blue line represents equivalence. \nIf uncertainty is high, use more ' 'simulations and/or sites. \nLook also at ratio of mean daily emissions' 'over entire timeseries.') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot3.save(self.output_directory + 'relative_mitigation2.png', width=7, height=3, dpi=900) # --------------------------------------- # ------ Figure to compare costs ------ dfs = self.cost_dfs for i in range(len(dfs)): n_cols = dfs[i].shape[1] dfs[i]['mean'] = dfs[i].iloc[:, 0:n_cols].mean(axis=1) dfs[i]['std'] = dfs[i].iloc[:, 0:n_cols].std(axis=1) dfs[i]['low'] = dfs[i].iloc[:, 0:n_cols].quantile(0.025, axis=1) dfs[i]['high'] = dfs[i].iloc[:, 0:n_cols].quantile(0.975, axis=1) dfs[i]['program'] = self.directories[i] # Move reference program to the top of the list for i, df in enumerate(dfs): if df['program'].iloc[0] == self.ref_program: dfs.insert(0, dfs.pop(i)) # Arrange dfs for plot 1 dfs_p1 = dfs.copy() for i in range(len(dfs_p1)): # Reshape dfs_p1[i] = pd.melt(dfs_p1[i], id_vars=['datetime', 'mean', 'std', 'low', 'high', 'program']) # Combine dataframes into single dataframe for plotting df_p1 = dfs_p1[0] for i in dfs_p1[1:]: df_p1 = df_p1.append(i, ignore_index=True) # Output Emissions df for other uses (e.g. live plot) df_p1.to_csv(self.output_directory + 'rolling_cost_estimates.csv', index=True) # Make plots from list of dataframes - one entry per dataframe pn.theme_set(pn.theme_linedraw()) plot1 = (pn.ggplot(None) + pn.aes('datetime', 'value', group='program') + pn.geom_ribbon(df_p1, pn.aes(ymin='low', ymax='high', fill='program'), alpha=0.2) + pn.geom_line(df_p1, pn.aes('datetime', 'mean', colour='program'), size=1) + pn.ylab('Estimated cost per facility') + pn.xlab('') + pn.scale_colour_hue(h=0.15, l=0.25, s=0.9) + pn.scale_x_datetime(labels=date_format('%Y')) + # pn.scale_y_continuous(trans='log10') + pn.labs(color='Program', fill='Program') + pn.theme(panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5)) ) plot1.save(self.output_directory + 'cost_estimate_temporal.png', width=7, height=3, dpi=900) ######################################## # Cost breakdown by program and method method_lists = [] for i in range(len(self.directories)): df = pd.read_csv( self.output_directory + self.directories[i] + "/timeseries_output_0.csv") df = df.filter(regex='cost$', axis=1) df = df.drop(columns=["total_daily_cost"]) method_lists.append(list(df)) costs = [[] for i in range(len(self.all_data))] for i in range(len(self.all_data)): for j in range(len(self.all_data[i])): simcosts = [] for k in range(len(method_lists[i])): timesteps = len(self.all_data[i][j][method_lists[i][k]]) simcosts.append( (sum(self.all_data[i][j][method_lists[i][k]])/timesteps/self.n_sites)*365) costs[i].append(simcosts) rows_list = [] for i in range(len(costs)): df_temp = pd.DataFrame(costs[i]) for j in range(len(df_temp.columns)): dict = {} dict.update({'Program': self.directories[i]}) dict.update({'Mean Cost': round(df_temp.iloc[:, j].mean())}) dict.update({'St. Dev.': df_temp.iloc[:, j].std()}) dict.update({'Method': method_lists[i][j].replace('_cost', '')}) rows_list.append(dict) df = pd.DataFrame(rows_list) # Output Emissions df for other uses df.to_csv(self.output_directory + 'cost_comparison.csv', index=True) plot = ( pn.ggplot( df, pn.aes( x='Program', y='Mean Cost', fill='Method', label='Mean Cost')) + pn.geom_bar(stat="identity") + pn.ylab('Cost per Site per Year') + pn.xlab('Program') + pn.scale_fill_hue(h=0.15, l=0.25, s=0.9) + pn.geom_text(size=15, position=pn.position_stack(vjust=0.5)) + pn.theme( panel_border=pn.element_rect(colour="black", fill=None, size=2), panel_grid_minor_x=pn.element_blank(), panel_grid_major_x=pn.element_blank(), panel_grid_minor_y=pn.element_line( colour='black', linewidth=0.5, alpha=0.3), panel_grid_major_y=pn.element_line( colour='black', linewidth=1, alpha=0.5))) plot.save(self.output_directory + 'cost_comparison.png', width=7, height=3, dpi=900) return
def plot(df: 'DataFrame', group_colname: str = None, time_colname: str = None, max_num_groups: int = 1, split_dt: Optional[np.datetime64] = None, **kwargs) -> 'DataFrame': """ :param df: The output of `.to_dataframe()`. :param group_colname: The name of the group-column. :param time_colname: The name of the time-column. :param max_num_groups: Max. number of groups to plot; if the number of groups in the dataframe is greater than this, a random subset will be taken. :param split_dt: If supplied, will draw a vertical line at this date (useful for showing pre/post validation). :param kwargs: Further keyword arguments to pass to `plotnine.theme` (e.g. `figure_size=(x,y)`) :return: A plot of the predicted and actual values. """ from plotnine import ( ggplot, aes, geom_line, geom_ribbon, facet_grid, facet_wrap, theme_bw, theme, ylab, geom_vline ) is_components = ('process' in df.columns and 'state_element' in df.columns) if group_colname is None: group_colname = 'group' if group_colname not in df.columns: raise TypeError("Please specify group_colname") if time_colname is None: time_colname = 'time' if 'time' not in df.columns: raise TypeError("Please specify time_colname") df = df.copy() if df[group_colname].nunique() > max_num_groups: subset_groups = df[group_colname].drop_duplicates().sample(max_num_groups).tolist() if len(subset_groups) < df[group_colname].nunique(): print("Subsetting to groups: {}".format(subset_groups)) df = df.loc[df[group_colname].isin(subset_groups), :] num_groups = df[group_colname].nunique() aes_kwargs = {'x': time_colname} if is_components: aes_kwargs['group'] = 'state_element' plot = ( ggplot(df, aes(**aes_kwargs)) + geom_line(aes(y='mean'), color='#4C6FE7', size=1.5, alpha=.75) + geom_ribbon(aes(ymin='lower', ymax='upper'), color=None, alpha=.25) + ylab("") ) if is_components: num_processes = df['process'].nunique() if num_groups > 1 and num_processes > 1: raise ValueError("Cannot plot components for > 1 group and > 1 processes.") elif num_groups == 1: plot = plot + facet_wrap(f"~ measure + process", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: from plotnine.facets.facet_wrap import n2mfrow nrow, _ = n2mfrow(len(df[['process', 'measure']].drop_duplicates().index)) kwargs['figure_size'] = (12, nrow * 2.5) else: plot = plot + facet_grid(f"{group_colname} ~ measure", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: kwargs['figure_size'] = (12, num_groups * 2.5) if (df.groupby('measure')['process'].nunique() <= 1).all(): plot = plot + geom_line(aes(y='mean', color='state_element'), size=1.5) else: if 'actual' in df.columns: plot = plot + geom_line(aes(y='actual')) if num_groups > 1: plot = plot + facet_grid(f"{group_colname} ~ measure", scales='free_y', labeller='label_both') else: plot = plot + facet_wrap("~measure", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: kwargs['figure_size'] = (12, 5) if split_dt: plot = plot + geom_vline(xintercept=np.datetime64(split_dt), linetype='dashed') return plot + theme_bw() + theme(**kwargs)