def scatter_plot(config_file, plot_file, burnin=0, max_clusters=None, mesh_size=101, min_cluster_size=0, samples=None, thin=1): utils.setup_plot() df = post_process.clusters.load_summary_table( config_file, burnin=burnin, max_clusters=max_clusters, mesh_size=mesh_size, min_size=min_cluster_size, thin=thin, ) mean_df = df.pivot(index='cluster_id', columns='sample_id', values='mean') error_df = df.pivot(index='cluster_id', columns='sample_id', values='std') if samples is None: samples = sorted(df['sample_id'].unique()) color_map = utils.get_clusters_color_map(pd.Series(df['cluster_id'])) _scatter.plot_all_pairs(color_map, mean_df, plot_file, samples, error_df=error_df)
def scatter_plot(config_file, plot_file, burnin=0, max_clusters=None, min_cluster_size=0, samples=None, thin=1, value='cellular_prevalence'): utils.setup_plot() df = post_process.loci.load_table(config_file, burnin, thin, max_clusters=max_clusters, min_cluster_size=min_cluster_size) if samples is None: samples = sorted(df['sample_id'].unique()) color_map = utils.get_clusters_color_map(df['cluster_id']) cluster_df = df[['mutation_id', 'cluster_id']].drop_duplicates().set_index('mutation_id') loci_color_map = cluster_df['cluster_id'].map(color_map).to_dict() mean_df = df.pivot(index='mutation_id', columns='sample_id', values=value) _scatter.plot_all_pairs(loci_color_map, mean_df, plot_file, samples, legend_color_map=color_map)
def density_plot(config_file, plot_file, burnin=0, max_clusters=None, mesh_size=101, min_cluster_size=0, samples=None, thin=1): df = post_process.clusters.load_table(config_file, burnin=burnin, thin=thin, max_clusters=max_clusters, mesh_size=mesh_size, min_size=min_cluster_size) sizes = df[['cluster_id', 'size' ]].drop_duplicates().set_index('cluster_id').to_dict()['size'] if samples is None: samples = sorted(df['sample_id'].unique()) else: df = df[df['sample_id'].isin(samples)] num_samples = len(samples) clusters = df['cluster_id'].unique() postions = range(1, len(clusters) + 1) utils.setup_plot() width = 8 height = 2 * num_samples + 1 fig = pp.figure(figsize=(width, height)) grid = gs.GridSpec(nrows=num_samples, ncols=1) colors = utils.get_clusters_color_map(pd.Series(clusters)) for ax_index, sample_id in enumerate(samples): plot_df = df[df['sample_id'] == sample_id] plot_df = plot_df.drop(['sample_id', 'size'], axis=1).set_index('cluster_id') ax = fig.add_subplot(grid[ax_index]) utils.setup_axes(ax) ax.annotate(sample_id, xy=(1.01, 0.5), xycoords='axes fraction', fontsize=defaults.axis_label_font_size) for i, (cluster_id, log_pdf) in enumerate(plot_df.iterrows()): pos = postions[i] y = log_pdf.index.astype(float) x = np.exp(log_pdf) x = (x / x.max()) * 0.3 ax.fill_betweenx(y, pos - x, pos + x, color=colors[cluster_id], where=(x > 1e-6)) ax.set_xticks(postions) if ax_index == (num_samples - 1): x_tick_labels = [ '{0} (n={1})'.format(x, sizes[x]) for x in clusters ] ax.set_xticklabels(x_tick_labels, rotation=90) ax.set_xlabel(defaults.cluster_label, fontsize=defaults.axis_label_font_size) else: ax.set_xticklabels([]) utils.set_tick_label_font_sizes(ax, defaults.tick_label_font_size) ax.set_ylim(defaults.cellular_prevalence_limits) if num_samples == 1: ax.set_ylabel(defaults.cellular_prevalence_label, fontsize=defaults.axis_label_font_size) else: fig.text(-0.01, 0.5, defaults.cellular_prevalence_label, fontsize=defaults.axis_label_font_size, ha='center', rotation=90, va='center') grid.tight_layout(fig) utils.save_figure(fig, plot_file)
def parallel_coordinates_plot(config_file, plot_file, burnin=0, max_clusters=None, mesh_size=101, min_cluster_size=0, samples=None, thin=1): utils.setup_plot() plot_df = post_process.clusters.load_summary_table( config_file, burnin=burnin, max_clusters=max_clusters, mesh_size=mesh_size, min_size=min_cluster_size, thin=thin, ) if samples is None: samples = sorted(plot_df['sample_id'].unique()) else: plot_df = plot_df[plot_df['sample_id'].isin(samples)] clusters = sorted(plot_df['cluster_id'].unique()) plot_df['sample_index'] = plot_df['sample_id'].apply( lambda x: samples.index(x)) plot_df = plot_df.sort_values(by='sample_index') grid = sb.FacetGrid(plot_df, hue='cluster_id', hue_order=clusters, palette='husl') grid.map(pp.errorbar, 'sample_index', 'mean', 'std', marker=defaults.line_plot_marker, markersize=defaults.line_plot_marker_size) ax = grid.ax utils.setup_axes(ax) fig = grid.fig # Legend box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Cluster') # Axis formatting ax.set_xticks(sorted(plot_df['sample_index'].unique())) ax.set_xticklabels(samples) ax.set_xlabel(defaults.sample_label, fontsize=defaults.axis_label_font_size) ax.set_ylabel(defaults.cellular_prevalence_label, fontsize=defaults.axis_label_font_size) utils.set_tick_label_font_sizes(ax, defaults.tick_label_font_size) # Plot limits ax.set_xlim(plot_df['sample_index'].min() - 0.1, plot_df['sample_index'].max() + 0.1) ax.set_ylim(*defaults.cellular_prevalence_limits) # Resize and save figure fig.set_size_inches(*utils.get_parallel_coordinates_figure_size(samples)) utils.save_figure(fig, plot_file)
def density_plot(config_file, plot_file, burnin=0, samples=None, thin=1): utils.setup_plot() df = _load_density_df(config_file, burnin, thin) if samples is None: samples = sorted(df['sample_id'].unique()) else: df = df[df['sample_id'].isin(samples)] loci = df['mutation_id'].unique() num_loci = len(loci) width = 8 height = 2 * num_loci + 2 fig = pp.figure(figsize=(width, height)) grid = gs.GridSpec(nrows=num_loci, ncols=1) for ax_index, locus in enumerate(loci): ax = fig.add_subplot(grid[ax_index]) utils.setup_axes(ax) plot_df = df[df['mutation_id'] == locus] sb.violinplot(ax=ax, data=plot_df, x='sample_id', y='cellular_prevalence', inner=None, order=samples, scale='width') ax.set_ylabel('') if ax_index != (num_loci - 1): ax.set_xticklabels([]) ax.set_xlabel('') else: ax.set_xlabel(defaults.sample_label) ax.set_ylim(*defaults.cellular_prevalence_limits) ax.annotate(locus, xy=(1.01, 0.5), xycoords='axes fraction', fontsize=defaults.axis_label_font_size) utils.set_tick_label_font_sizes(ax, defaults.tick_label_font_size) fig.text(-0.01, 0.5, defaults.cellular_prevalence_label, fontsize=defaults.axis_label_font_size, ha='center', rotation=90, va='center') grid.tight_layout(fig, h_pad=3) utils.save_figure(fig, plot_file)
def parallel_coordinates_plot(config_file, plot_file, burnin=0, max_clusters=None, min_cluster_size=0, samples=None, thin=1, value='cellular_prevalence'): utils.setup_plot() df = post_process.loci.load_table(config_file, burnin, thin, max_clusters=max_clusters, min_cluster_size=min_cluster_size) color_map = utils.get_clusters_color_map(df['cluster_id']) if samples is None: samples = sorted(df['sample_id'].unique()) else: df = df[df['sample_id'].isin(samples)] df['sample_index'] = df['sample_id'].apply(lambda x: samples.index(x)) df = df.sort_values(by='sample_index') fig = pp.figure() ax = fig.add_subplot(1, 1, 1) utils.setup_axes(ax) for cluster_id, cluster_df in df.groupby('cluster_id'): for _, locus_df in cluster_df.groupby('mutation_id'): x = locus_df['sample_index'] y = locus_df[value] ax.plot(x, y, alpha=0.75, c=color_map[cluster_id], marker=defaults.line_plot_marker, markersize=defaults.line_plot_marker_size) ax.set_xlabel(defaults.sample_label, fontsize=defaults.axis_label_font_size) if value == 'cellular_prevalence': ax.set_ylabel(defaults.cellular_prevalence_label, fontsize=defaults.axis_label_font_size) elif value == 'variant_allele_frequency': ax.set_ylabel(defaults.variant_allele_frequency_label) ax.set_xticks(sorted(df['sample_index'].unique())) ax.set_xticklabels(samples) utils.set_tick_label_font_sizes(ax, defaults.tick_label_font_size) ax.set_ylim(*defaults.cellular_prevalence_limits) box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) legend_handles = utils.get_legend_handles(color_map) legend = ax.legend(legend_handles.values(), legend_handles.keys(), bbox_to_anchor=(1, 0.5), fontsize=defaults.legend_font_size, loc='center left', title=defaults.cluster_label) legend.get_title().set_fontsize(defaults.legend_title_font_size) utils.save_figure(fig, plot_file)