def cum_regret_plot(experiment_name, data_path=_DEFAULT_DATA_PATH): """Simple plot of average instantaneous regret by agent, per timestep. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf """ df = load_data(experiment_name, data_path) plt_df = (df.groupby(['t', 'agent']).agg({ 'cum_regret': [np.mean, lower_interval, upper_interval] }).reset_index()) plt_df.columns = ['_'.join(i) for i in plt_df.columns.values] p = (gg.ggplot(plt_df) + gg.aes('t_', 'cum_regret_mean', colour='agent_') + gg.geom_line(size=1.25, alpha=0.75) + gg.geom_ribbon(gg.aes(ymin='cum_regret_lower_interval', ymax='cum_regret_upper_interval', fill='agent_'), alpha=0.1) + gg.xlab('time period (t)') + gg.ylab('cumulative regret') + gg.scale_colour_brewer(name='agent_', type='qual', palette='Set1')) plot_dict = {experiment_name + '_cum_regret': p} return plot_dict
def misspecified_plot(experiment_name='finite_misspecified', data_path=_DEFAULT_DATA_PATH): """Specialized plotting script for TS tutorial paper misspecified TS.""" df = load_data(experiment_name, data_path) def _parse_np_array(np_string): return np.array(np_string.replace('[', '') .replace(']', '') .strip() .split()) df['posterior_mean'] = df.posterior_mean.apply(_parse_np_array) # Action means new_col_list = ['mean_0', 'mean_1', 'mean_2'] for n, col in enumerate(new_col_list): df[col] = df['posterior_mean'].apply(lambda x: float(x[n])) plt_df = (df.groupby(['agent', 't']) .agg({'instant_regret': np.mean, 'mean_0': np.mean, 'mean_1': np.mean, 'mean_2': np.mean}) .reset_index()) regret_plot = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') + gg.ylab('Average instantaneous regret') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1') + gg.coord_cartesian(ylim=(0, 0.02))) melt_df = pd.melt(plt_df, id_vars=['agent', 't'], value_vars=new_col_list) melt_df['group_id'] = melt_df.agent + melt_df.variable action_plot = (gg.ggplot(melt_df) + gg.aes('t', 'value', colour='agent', group='group_id') + gg.geom_line(size=1.25, alpha=0.75) + gg.coord_cartesian(ylim=(0, 0.05)) + gg.xlab('Timestep (t)') + gg.ylab('Expected mean reward') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')) plot_dict = {} plot_dict['misspecified_regret'] = regret_plot plot_dict['misspecified_action'] = action_plot return plot_dict
def plot_action_proportion(df_agent): """Plot the action proportion for the sub-dataframe for a single agent.""" n_action = np.max(df_agent.action) + 1 plt_data = [] for i in range(n_action): probs = (df_agent.groupby('t').agg({ 'action': lambda x: np.mean(x == i) }).rename(columns={'action': 'action_' + str(i)})) plt_data.append(probs) plt_df = pd.concat(plt_data, axis=1).reset_index() p = (gg.ggplot(pd.melt(plt_df, id_vars='t')) + gg.aes('t', 'value', colour='variable', group='variable') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') + gg.ylab('Action probability') + gg.ylim(0, 1) + gg.scale_colour_brewer(name='Variable', type='qual', palette='Set1')) return p
def plot_qq(df, color_var, facet_var=None, title=''): """ Inspired by https://www.cureffi.org/2012/08/15/qq-plots-with-matplotlib/ """ # retrive pmin, the most significant (i.e. min) p value (for defining # the axes) axis_max = max(df['pval_neglog10']) if facet_var is None: pvals = df.groupby( by=color_var).apply(calculate_expected_pval).reset_index( level=color_var, drop=True) else: pvals = df.groupby(by=[color_var, facet_var]).apply( calculate_expected_pval).reset_index(level=[color_var, facet_var], drop=True) # now plot these two arrays against each other n_colors = pvals[color_var].nunique() qqplot = plt9.ggplot( pvals, plt9.aes(x='expected_pval_neglog10', y='pval_neglog10', color=color_var)) qqplot = qqplot + plt9.geom_point(size=0.1, alpha=0.25) qqplot = qqplot + plt9.geom_abline( slope=1, intercept=0, color='black', linetype='dashed') qqplot = qqplot + plt9.theme_bw() if n_colors < 9: qqplot = qqplot + plt9.scale_colour_brewer(palette='Dark2', type='qual') qqplot = qqplot + plt9.labs(x='Expected (-log10 p-value)', y='Observed (-log10 p-value)', title=title, color='') qqplot = qqplot + plt9.lims(x=(0, axis_max), y=(0, axis_max)) if facet_var is not None: qqplot = qqplot + plt9.facet_wrap('~ {}'.format(facet_var), ncol=5) qqplot = qqplot + plt9.theme(strip_text=plt9.element_text(size=5), axis_text_x=plt9.element_text(angle=-45, hjust=0)) # set guide legend alpha to 1 qqplot = qqplot + plt9.guides(color=plt9.guide_legend(override_aes={ 'size': 2.0, 'alpha': 1.0 })) return (qqplot)
def simple_algorithm_plot(experiment_name, data_path=_DEFAULT_DATA_PATH): """Simple plot of average instantaneous regret by agent, per timestep. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: plot_dict: {experiment_name: ggplot plot} """ df = load_data(experiment_name, data_path) plt_df = (df.groupby(['t', 'agent']).agg({ 'instant_regret': np.mean }).reset_index()) p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') + gg.ylab('Average instantaneous regret') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1')) return {experiment_name: p}
def simple_algorithm_plot(experiment_name, data_path=_DEFAULT_DATA_PATH): """Simple plot of average instantaneous regret by agent, per timestep. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: https://web.stanford.edu/~bvr/pubs/TS_Tutorial.pdf """ df = load_data(experiment_name, data_path) plt_df = (df.groupby(['t', 'agent']).agg({ 'instant_regret': np.mean }).reset_index()) p = (gg.ggplot(plt_df) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') + gg.ylab('per-period regret') + gg.scale_colour_brewer(name='agent', type='qual', palette='Set1')) plot_dict = {experiment_name + '_simple': p} return plot_dict
def cumulative_travel_time_plot(experiment_name, data_path=_DEFAULT_DATA_PATH): """Plot cumulative ratio total travel time relative to optimal shortest path. Args: experiment_name: string = name of experiment config. data_path: string = where to look for the files. Returns: plot_dict: {experiment_name: ggplot plot} """ df = load_data(experiment_name, data_path) df['cum_ratio'] = (df.cum_optimal - df.cum_regret) / df.cum_optimal plt_df = (df.groupby(['t', 'agent']).agg({ 'cum_ratio': np.mean }).reset_index()) p = (gg.ggplot(plt_df) + gg.aes('t', 'cum_ratio', colour='agent') + gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('Timestep (t)') + gg.ylab('Total distance / optimal') + gg.scale_colour_brewer(name='Agent', type='qual', palette='Set1') + gg.aes(ymin=1) + gg.geom_hline(yintercept=1, linetype='dashed', size=2, alpha=0.5)) return {experiment_name + '_cum': p}
def main(): """Run CLI.""" parser = argparse.ArgumentParser(description=""" Read anndata object. Read chrX and chrY genes, plot scatterplot of mean expression of those signature across samples. Anndata should have experiment_id and sex columns. """) parser.add_argument('-h5', '--h5_anndata', dest='h5', required=True, help='H5 AnnData file.') parser.add_argument( '-Y', '--chrY_genes', default='', dest='Y', required=False, help='TSV file of Y genes. If none, uses all genes on Y chr.') parser.add_argument('-X', '--chrX_genes', default='', dest='X', required=False, help='TSV file of X genes. If none, uses XIST.') parser.add_argument('-o', '--output_file', default='scatterplot-sex_sample_swap_check', dest='o', help='Basename for output files.') options = parser.parse_args() # Load the AnnData file adata = sc.read_h5ad(filename=options.h5) # If we have a flag for cells that pass QC then filter down to them if 'cell_passes_qc' in adata.obs: adata = adata[adata.obs['cell_passes_qc'], :] del adata.obs['cell_passes_qc'] # Read Chr X and Chr Y genes if options.X != '': X = pd.read_csv(options.X, sep="\t") X = X['ensembl_gene_id'] X_lab = "Mean X chr gene expression (counts)" else: X = ['ENSG00000229807'] # X = ['XIST'] X_lab = "Mean XIST gene expression (counts)" if options.Y != '': Y = pd.read_csv(options.Y, sep="\t") Y = Y['ensembl_gene_id'] else: Y = [ "ENSG00000184895", "ENSG00000129824", "ENSG00000067646", "ENSG00000176679", "ENSG00000099715", "ENSG00000168757", "ENSG00000099721", "ENSG00000092377", "ENSG00000099725", "ENSG00000233803", "ENSG00000229549", "ENSG00000228927", "ENSG00000258992", "ENSG00000238074", "ENSG00000236424", "ENSG00000114374", "ENSG00000067048", "ENSG00000183878", "ENSG00000154620", "ENSG00000129864", "ENSG00000129862", "ENSG00000165246", "ENSG00000129873", "ENSG00000182415", "ENSG00000172468", "ENSG00000169953", "ENSG00000286265", "ENSG00000012817", "ENSG00000198692", "ENSG00000280969", "ENSG00000242875", "ENSG00000234414", "ENSG00000244395", "ENSG00000242389", "ENSG00000169807", "ENSG00000169800", "ENSG00000226941", "ENSG00000169789", "ENSG00000183753", "ENSG00000188120", "ENSG00000205944", "ENSG00000169763", "ENSG00000172352", "ENSG00000183795", "ENSG00000187191", "ENSG00000205916", "ENSG00000185894", "ENSG00000172288" ] # Same as above, but hugo names. # Y = [ # "SRY", # "RPS4Y1", # "ZFY", # "TGIF2LY", # "PCDH11Y", # "TSPY2", # "AMELY", # "TBL1Y", # "PRKY", # "TSPY4", # "TSPY8", # "TSPY3", # "TSPY1", # "TSPY9P", # "TSPY10", # "USP9Y", # "DDX3Y", # "UTY", # "TMSB4Y", # "VCY", # "VCY1B", # "NLGN4Y", # "CDY2B", # "CDY2A", # "HSFY1", # "HSFY2", # "AC007244.1", # "KDM5D", # "EIF1AY", # "RPS4Y2", # "RBMY1B", # "RBMY1A1", # "RBMY1D", # "RBMY1E", # "PRY2", # "RBMY1F", # "RBMY1J", # "PRY", # "BPY2", # "DAZ1", # "DAZ2", # "PRYP3", # "CDY1B", # "BPY2B", # "DAZ3", # "DAZ4", # "BPY2C", # "CDY1" # ] # Make the plot adata.var['X_chr-gene'] = np.in1d(adata.var.index, X) adata.var['Y_chr-gene'] = np.in1d(adata.var.index, Y) adata.obs['X_chr-sum'] = adata[:, adata.var['X_chr-gene']].X.todense().sum( axis=1) adata.obs['Y_chr-sum'] = adata[:, adata.var['Y_chr-gene']].X.todense().sum( axis=1) if 'sex' not in adata.obs.columns: adata.obs['sex'] = 'not reported' df = adata.obs[['experiment_id', 'sex', 'Y_chr-sum', 'X_chr-sum']] df = df.groupby(['experiment_id', 'sex']).mean().dropna().reset_index() # Save scatterplot with mean expression per sample plt = plt9.ggplot(df) + plt9.aes(x='X_chr-sum', y='Y_chr-sum', color='sex') plt = plt + plt9.theme_bw() plt = plt + plt9.scale_colour_brewer(type='qual', palette='Dark2') plt = plt + plt9.geom_point(alpha=0.45) plt = plt + plt9.ylab("Mean Y chr gene expression (counts)") plt = plt + plt9.xlab(X_lab) plt.save('{}.png'.format(options.o), dpi=300, width=4, height=4)
def concurrent_agents_plot(experiment_name='graph_indep_concurrent', data_path=_DEFAULT_DATA_PATH, paper_version=True): '''Passing paper_version=True should be used to reproduce Fig. 14 of the paper for K = 1,10,20,50,100. In this case, the labels in the legend are manually ordered by the values of K. Otherwise, the labels are ordered alphabetically.''' df = load_data(data_path, experiment_name) plt_df_per_action = (df.groupby(['agent', 't', 'agent_id', 'action_id']).agg({ 'instant_regret': np.mean }).reset_index()) plt_df_per_period = (df.groupby(['agent', 't']).agg({ 'instant_regret': np.mean }).reset_index()) if not paper_version: p_per_action = ( gg.ggplot(plt_df_per_action) + gg.aes('action_id', 'instant_regret', colour='agent') + gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) + gg.xlim(0, 2.5 * len(plt_df_per_period.groupby('t'))) + gg.scale_colour_brewer(name='agent', type='qual', palette='Set1') + gg.labels.xlab('number of actions') + gg.labels.ylab('per-period regret')) p_per_period = ( gg.ggplot(plt_df_per_period) + gg.aes('t', 'instant_regret', colour='agent') + gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) + gg.scale_colour_brewer(name='agent', type='qual', palette='Set1') + gg.labels.xlab('time period (t)') + gg.labels.ylab('per-period regret')) else: plt_df_per_action['agent_id'] = plt_df_per_action.agent.apply( get_agent_id) plt_df_per_period['agent_id'] = plt_df_per_period.agent.apply( get_agent_id) custom_labels = ['K = 1', 'K = 10', 'K = 20', 'K = 50', 'K = 100'] custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00"] p_per_action = ( gg.ggplot(plt_df_per_action) + gg.aes('action_id', 'instant_regret', colour='agent_id') + gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) + gg.xlim(0, 2.5 * len(plt_df_per_period.groupby('t'))) + gg.scale_color_manual( name='agent', labels=custom_labels, values=custom_colors) + gg.labels.xlab('number of actions') + gg.labels.ylab('per-action regret')) p_per_period = ( gg.ggplot(plt_df_per_period) + gg.aes('t', 'instant_regret', colour='agent_id') + gg.geom_line() + gg.geom_line(size=1.25, alpha=0.75) + gg.scale_color_manual( name='agent', labels=custom_labels, values=custom_colors) + gg.labels.xlab('time period (t)') + gg.labels.ylab('per-period regret')) plot_dict = {} plot_dict['per_action_plot'] = p_per_action plot_dict['per_period_plot'] = p_per_period return plot_dict
def plot_ecdf(df_plot, variable_column, color_column='none', output_file='plot_distribution', facet_column='none', x_log10=False): """Plot plot_distribution to png. Parameters ---------- df_plot : pandas.DataFrame DataFrame with <variable_column> as a column. variable_column : string String of variable_column column to plot. color_column : string String of color column to plot. output_file : string Basename of output file. facet_column : string Column to facet the plot by. Returns ------- NULL """ n_colors = 0 if color_column != 'none': gplt = plt9.ggplot(df_plot, plt9.aes(x=variable_column, color=color_column)) n_colors = df_plot[color_column].nunique() else: gplt = plt9.ggplot(df_plot, plt9.aes(x=variable_column)) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.stat_ecdf(alpha=0.8) if x_log10: gplt = gplt + plt9.scale_x_continuous( trans='log10', # labels=comma_labels, minor_breaks=0) else: gplt = gplt + plt9.scale_x_continuous( # trans='log10', # labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.labs(y='Cumulative density', title='') if n_colors != 0 and n_colors > 20: gplt = gplt + plt9.theme(legend_position='none') elif n_colors != 0 and n_colors < 9: gplt = gplt + plt9.scale_colour_brewer(palette='Dark2', type='qual') if facet_column != 'none': gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5) n_facets = df_plot[facet_column].nunique() gplt.save('{}.png'.format(output_file), dpi=300, width=6 * (n_facets / 4), height=4 * (n_facets / 4), limitsize=False) else: gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4) return 0
def plot_umi_mt_density(df_plot, output_file='plot_umi_mt_density', facet_column='none', color_var='density', density_contour=False): """Plot plot_umi_mt_density to png. Parameters ---------- df_plot : pandas.DataFrame DataFrame with the followig keys 'total_counts', 'pct_counts_gene_group__mito_transcript'. output_file : string Basename of output file. facet_column : string Column to facet the output by. Returns ------- NULL """ if color_var == 'density': color_title = 'Density\n' # Also calculate density using a gaussian 2d kernal -- use random # name for plot column color_var = "1251234_density" df_plot[color_var] = calculate_density(df_plot, facet_column) elif color_var == 'pct_counts_gene_group__mito_transcript': color_title = '% MT\n' elif color_var == 'cell_passes_qc': color_title = 'Cell passed QC\n' else: color_title = color_var gplt = plt9.ggplot( df_plot, plt9.aes(x='total_counts', y='pct_counts_gene_group__mito_transcript', color=color_var)) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_point(alpha=0.5, size=0.8) gplt = gplt + plt9.scale_x_continuous( trans='log10', labels=comma_labels, minor_breaks=0) if color_var == 'pct_counts_gene_group__mito_transcript': gplt = gplt + plt9.scale_color_gradient2(low='#3B9AB2', mid='#EBCC2A', high='#F21A00', midpoint=50, limits=[0, 100]) gplt = gplt + plt9.guides(color=plt9.guide_colorbar(ticks=False)) elif color_var == 'cell_passes_qc': gplt = gplt + plt9.scale_colour_brewer(type='qual', palette='Dark2') elif color_var == '1251234_density': gplt = gplt + plt9.scale_color_cmap(cmap_name='viridis') if density_contour: gplt = gplt + plt9.geom_density_2d(alpha=0.5) gplt = gplt + plt9.labs(x='Number of molecules', y='Percent of molecules from MT genes', title='', color=color_title) if facet_column != 'none': gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5) n_samples = df_plot[facet_column].nunique() gplt.save('{}.png'.format(output_file), dpi=300, width=4 * (n_samples / 2), height=4 * (n_samples / 4), limitsize=False) else: gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4)