def visualize_embedding(multinet, labels=None, verbose=True): embedding = multinet.embedding X = embedding[0] indices = embedding[1] if verbose: print("------ Starting embedding visualization -------") if labels: # optionally match indices to labels and add a column label_vector = [labels[x] for x in indices] X_embedded = TSNE(n_components=2).fit_transform(X) dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2']) dfr['labels'] = label_vector print(dfr.head()) gx = (ggplot(dfr, aes('dim1', 'dim2', color="labels")) + geom_point(size=0.5) + theme_bw()) gx.draw() plt.show() else: X_embedded = TSNE(n_components=2).fit_transform(X) dfr = pd.DataFrame(X_embedded, columns=['dim1', 'dim2']) print(dfr.head()) gx = (ggplot(dfr, aes('dim1', 'dim2')) + geom_point(size=0.5) + theme_bw()) gx.draw() plt.show()
def plot_ambient_by_difference(adata, plot_name='cellbender_results'): # Compute the total amount of expression of each gene adata.var['total_gene_counts_raw'] = np.array( adata.layers['counts_raw'].sum(axis=0)).squeeze() adata.var['total_gene_counts_cellbender'] = np.array( adata.layers['counts_cellbender'].sum(axis=0)).squeeze() adata.var['difference_total_gene_counts_raw_cellbender'] = adata.var[ 'total_gene_counts_raw'] - adata.var['total_gene_counts_cellbender'] # Make the plot gplt = plt9.ggplot(adata.var) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_point(plt9.aes( x='ambient_expression', y='difference_total_gene_counts_raw_cellbender'), alpha=0.25) gplt = gplt + plt9.labs(x='Ambient RNA signature', y='Counts removed by cellbender', title='Ambient RNA signature removal per gene') # gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, # minor_breaks=0 # ) gplt.save( '{}-ambient_signature-scatter.png'.format(plot_name), #dpi=300, width=5, height=5) # Add gene names to the plot gplt = plt9.ggplot(adata.var) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_text(plt9.aes( x='ambient_expression', y='difference_total_gene_counts_raw_cellbender', label='gene_symbols'), alpha=0.25) gplt = gplt + plt9.labs(x='Ambient RNA signature', y='Counts removed by cellbender', title='Ambient RNA signature removal per gene') # gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, # minor_breaks=0 # ) gplt.save( '{}-ambient_signature-scatter_genenames.png'.format(plot_name), #dpi=300, width=5, height=5)
def plot_mass(calculated_cell_mass, plot_every_nth_point): """ Plots the resulting mass Args: calculated_cell_mass (`pandas data frame`): Pandas data frame [Nx3] with time and calculated cell mass and rolling mean averaged cell mass plot_every_nth_point (`int`): If 1 all data points are plotted. Otherwise every nth data point is used for plotting. Returns: p (`ggplot object`): Returns a ggplot plot object """ col_names = list(calculated_cell_mass) col_names[0] = 'Time (h)' calculated_cell_mass.columns = col_names calculated_cell_mass = calculated_cell_mass.iloc[::plot_every_nth_point, :] # Plot data p = ggplot(aes(x=col_names[0], y=col_names[1]), data=calculated_cell_mass) + \ geom_point(alpha=0.1) + \ geom_line(aes(y=col_names[2]), color='red') + \ theme_bw() return p
def plot_categ_spatial(mod, adata, sample_col, color, n_columns=2, figure_size=(24, 5.7), point_size=0.8, text_size=9): for_plot = adata.obs[["imagecol", "imagerow", sample_col]] for_plot["color"] = color # fix types for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True) # for_plot['color'] = pd.to_numeric(for_plot['color']) for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False) for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"]) for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"]) ax = ( plotnine.ggplot( for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color")) + plotnine.geom_point(size=point_size) # + plotnine.scale_color_cmap() + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme( panel_background=plotnine.element_rect( fill="black", colour="black", size=0, linetype="solid"), panel_grid_major=plotnine.element_line( size=0, linetype="solid", colour="black"), panel_grid_minor=plotnine.element_line( size=0, linetype="solid", colour="black"), strip_text=plotnine.element_text(size=text_size), ) + plotnine.facet_wrap("~sample", ncol=n_columns) + plotnine.theme(figure_size=figure_size)) return ax
def accPlot(accsByNFeats): plotdata = [] for s in accsByNFeats: plotdata.append( pd.concat([ pd.DataFrame({ "p": p, "acc": accsByNFeats[s][p], "set": s }, index=[str(p)]) for p in accsByNFeats[s] ], axis=0)) ggd = pd.concat(plotdata) ggd['acc'] = ggd['acc'].astype(float) ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set')) ggo += gg.geom_line(alpha=0.5) ggo += gg.geom_point() ggo += gg.theme_bw() ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000]) ggo += gg.scale_color_manual( values=['darkgray', 'black', 'red', 'dodgerblue']) ggo += gg.ylab('Accuracy (5-fold CV)') print(ggo) return ggd
def facet_sweep_plot(base_plot: gg.ggplot, sweep_vars: Sequence[str] = None, tall_plot: bool = False) -> gg.ggplot: """Add a facet_wrap to the plot based on sweep_vars.""" df = base_plot.data.copy() if sweep_vars: # Work out what size the plot should be based on the hypers + add facet. n_hypers = df[sweep_vars].drop_duplicates().shape[0] base_plot += gg.facet_wrap(sweep_vars, labeller='label_both') else: n_hypers = 1 if n_hypers == 1: fig_size = (7, 5) elif n_hypers == 2: fig_size = (13, 5) elif n_hypers == 4: fig_size = (13, 8) elif n_hypers <= 12: fig_size = (15, 4 * np.divide(n_hypers, 3) + 1) else: print('WARNING - comparing {} agents at once is more than recommended.' .format(n_hypers)) fig_size = (15, 12) if tall_plot: fig_size = (fig_size[0], fig_size[1] * 1.25) theme_settings = gg.theme_bw(base_size=18, base_family='serif') theme_settings += gg.theme( figure_size=fig_size, panel_spacing_x=0.5, panel_spacing_y=0.5,) return base_plot + theme_settings
def plot_predict(forecast): p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) + geom_point(colour='blue', alpha=0.3, na_rm=True) + geom_line(colour='blue', na_rm=True) + geom_line( data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') + geom_ribbon(data=forecast, mapping=aes(ymin='yhat_lower', ymax='yhat_upper'), fill='blue', alpha=0.1) + scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') + xlab('Time') + ylab('Pressure') + theme_bw() + theme(axis_text_x=element_text( angle=45, hjust=1, face='bold', color='black'), axis_text_y=element_text(face='bold', colour='black'))) ggplot.save(p, filename='predict_pressure_chart.png', path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'png'), width=8, height=6, units='in', dpi=326, verbose=False) return p
def make_sentiment_plot(sentiment_df, exclude_zero_bin=True, plot_text_labels=True): rows = [] print( "Sentiment plot: exclude zero bins? {} show text? {}".format( exclude_zero_bin, plot_text_labels ) ) for column in filter(lambda c: c.startswith("bin_"), sentiment_df.columns): c = Counter(sentiment_df[column]) date = column[4:] for bin_name, val in c.items(): if exclude_zero_bin and (bin_name == "0.0" or not isinstance(bin_name, str)): continue bin_name = str(bin_name) assert isinstance(bin_name, str) val = int(val) rows.append( { "date": datetime.strptime(date, "%Y-%m-%d"), "bin": bin_name, "value": val, } ) df = pd.DataFrame.from_records(rows) # print(df['bin'].unique()) # HACK TODO FIXME: should get from price_change_bins()... order = [ "-1000.0", "-100.0", "-10.0", "-5.0", "-3.0", "-2.0", "-1.0", "-1e-06", "1e-06", "1.0", "2.0", "3.0", "5.0", "10.0", "25.0", "100.0", "1000.0", ] df["bin_ordered"] = pd.Categorical(df["bin"], categories=order) plot = ( p9.ggplot(df, p9.aes("date", "bin_ordered", fill="value")) + p9.geom_tile(show_legend=False) + p9.theme_bw() + p9.xlab("") + p9.ylab("Percentage daily change") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(10, 5)) ) if plot_text_labels: plot = plot + p9.geom_text(p9.aes(label="value"), size=8, color="white") return plot_as_inline_html_data(plot)
def plot_replicate_density( df, batch, plate, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=300, height=1.5, width=2, ): density_gg = ( gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info")) + gg.geom_density(alpha=0.3) + gg.scale_fill_manual( name="Replicate", labels={ "True": "True", "False": "False" }, values=["#B99638", "#2DB898"], ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( title=gg.element_text(size=9), axis_text=gg.element_text(size=5), axis_title=gg.element_text(size=8), legend_text=gg.element_text(size=6), legend_title=gg.element_text(size=7), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), )) if output_file_base: save_figure(density_gg, output_file_base, output_file_extensions, dpi, height, width) return density_gg
def plot_umap_well(embedding_df, fig_file, well_column): well_gg = (gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point( gg.aes(color=well_column), size=0.2, shape=".", alpha=0.2) + gg.theme_bw()) well_gg.save(filename=fig_file, height=4, width=5, dpi=500) return well_gg
class THEME(): bgcolor = "#293241" LOADER_COLOR = "#2a9d8f" LOADER_TYPE = "dot" colors_light = [ "#d88c9a", "#f2d0a9", "#f1e3d3", "#99c1b9", "#8e7dbe", "#50514f", "#f25f5c", "#ffe066", "#247ba0", "#70c1b3", "#c97c5d", "#b36a5e" ] colors_dark = [ "#e07a5f", "#3d405b", "#81b29a", "#2b2d42", "#f77f00", "#6d597a" ] # mt = theme(panel_background=element_rect(fill=bgcolor) # ,plot_background=element_rect(fill=bgcolor) # , axis_text_x = element_text(color="black") # , axis_text_y = element_text(color="black") # , strip_margin_y=0.05 # , strip_margin_x=0.5) mt = theme_bw() + theme(panel_border=element_blank()) cat_colors = scale_fill_manual(values=colors_light) cat_colors_lines = scale_color_manual(values=colors_light) gradient_colors = scale_fill_gradient("#ce4257", "#aad576") FILL = 1 COLOR = 2 LONG_FIGURE = (10, 20)
def plot_significance_vs_ranking( summary_df, method_name, x_label, output_figure_filename ): # Format input dataframe plot_df = pd.DataFrame( data={ "Test statistic": summary_df[ method_stats_dict[method_name] + " (Real)" ].values, "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values, }, index=summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank")) fig += pn.geom_point() fig += pn.geom_point( plot_df[plot_df["Percentile rank"] > 0.9], pn.aes(x="Test statistic", y="Percentile rank"), color="red", ) fig += pn.geom_text( pn.aes( label=[ x if plot_df.loc[x, "Percentile rank"] > 0.9 else "" for x in plot_df.index ] ), ha="left", va="top", size=5, ) fig += pn.labs( x=x_label, y="Percentile of ranking", title=f"{method_name} pathway statistics vs ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) print(fig) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, )
def plot_auc(read_file_1, read_file_2, plot_dir, save_file, generate_auc): # read in data temp_sub = pd.read_csv(os.path.join(dir_output, read_file_1)) temp_agg = pd.read_csv(os.path.join(dir_output, read_file_2)) #subset agg model to match sub models temp_agg = subset_agg(temp_sub=temp_sub, temp_agg=temp_agg) # recode outcome temp_agg = recode_outcome(temp_dat=temp_agg) temp_sub = recode_outcome(temp_dat=temp_sub) if generate_auc: # get auc temp_sub = get_auc(temp_sub) temp_agg = get_auc(temp_agg) # remove NA temp_sub = temp_sub.dropna().reset_index(drop=True) temp_agg = temp_agg.dropna().reset_index(drop=True) # create new variable to indicate if agg or sub data temp_sub.insert(0, 'model', 'CPT specific') temp_agg.insert(0, 'model', 'Aggregate') # get outpult file plot_output = os.path.join(dir_figures, plot_dir) # combine data dat = pd.concat([temp_agg, temp_sub], axis=0).reset_index(drop=True) img = (ggplot(dat, aes(x='outcome', y='auc', fill='model')) + geom_violin(aes(draw_quantiles='auc')) + labs(x='Outcome', y='AUROC') + theme_bw()) img.save(os.path.join(plot_output, save_file))
def scatter_plot(df, xcol, ycol, domain, xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5): assert len(domain) == 2 POINT_SIZE = 0.5 DASH_PATTERN = (0, (3, 1)) if xname == None: xname = xcol if yname == None: yname = ycol # formater for axes' labels ax_formatter = mizani.custom_format('{:n}') if clamp: # clamp overflowing values if required df = df.copy(deep=True) df.loc[df[xcol] > domain[1], xcol] = domain[1] df.loc[df[ycol] > domain[1], ycol] = domain[1] # generate scatter plot scatter = p9.ggplot(df) scatter += p9.aes(x=xcol, y=ycol) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True) scatter += p9.labs(x=xname, y=yname) if log: # log scale scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter) scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter) else: scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter) scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter) #scatter += p9.theme_xkcd() scatter += p9.theme_bw() scatter += p9.theme( panel_grid_major=p9.element_line(color='#666666', alpha=0.5)) scatter += p9.theme(figure_size=(width, height)) # generate additional lines scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN) # diagonal scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN) # vertical rule scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN) # horizontal rule res = scatter return res
def plot(solu, k): # Generates a plot of the four bar mechanism, which represents a frame in the animation print("Frame: ", k) sol = solu[k:k + 1] p = ( ggplot(sol) + # MAIN LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x=0, y=0), shape = 'o', size = 3) + geom_point(aes(x = sol.Ro4[k].real, y = sol.Ro4[k].imag), shape = 'o', size = 3) + # 2ND LINKAGE geom_segment(aes(x = 0, y = 0, xend = sol.Ra[k].real, yend = sol.Ra[k].imag)) + geom_point(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag), shape = 'o', size = 3) + # AP LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rpa[k].real, yend = sol.Rpa[k].imag)) + geom_point(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag), shape = 'o', size = 3) + # 3RD LINKAGE geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, xend = sol.Rba[k].real, yend = sol.Rba[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # 4TH LINKAGE geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, xend = sol.Ro4[k].real, yend = sol.Ro4[k].imag)) + geom_point(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag), shape = 'o', size = 3) + # NODES IDENTIFICATION annotate("text", x = 0, y = -20, label = "$O_1$") + annotate("text", x = sol.Ro4[k].real, y = sol.Ro4[k].imag -20, label = "$O_4$") + annotate("text", x = sol.Ra[k].real+10, y = sol.Ra[k].imag, label = "$A$") + annotate("text", x = sol.Rba[k].real +20, y = sol.Rba[k].imag -10, label = "$B$") + annotate("text", x = sol.Rpa[k].real, y = sol.Rpa[k].imag -40, label = "$P$") + # ACCELERATIONS ARROWS (you may remove if you wish to remove acceleration informations) geom_segment(aes(x = sol.Rba[k].real, y = sol.Rba[k].imag, \ xend = sol.Rba[k].real + sol.Aba[k].real * ACC_SCALE, \ yend = sol.Rba[k].imag + sol.Aba[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point B geom_segment(aes(x = sol.Ra[k].real, y = sol.Ra[k].imag, \ xend = sol.Ra[k].real + sol.Aa[k].real * ACC_SCALE, \ yend = sol.Ra[k].imag + sol.Aa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point A geom_segment(aes(x = sol.Rpa[k].real, y = sol.Rpa[k].imag, \ xend = sol.Rpa[k].real + sol.Apaa[k].real * ACC_SCALE, \ yend = sol.Rpa[k].imag + sol.Apaa[k].imag * ACC_SCALE),\ colour='red', arrow=arrow()) + # Point C # ACCELERATIONS TEXTS (you may comment if you wish to remove acceleration informations) # inputting text between '$ $' makes plotnine produce beautiful LaTeX text annotate("text", x = sol.Rba[k].real-30, y = sol.Rba[k].imag+10, label = f'${np.absolute(sol.Aba[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Ra[k].real+20, y = sol.Ra[k].imag-20, label = f'${np.absolute(sol.Aa[k])/1000:.2f}~m/s^2$', colour='red') + annotate("text", x = sol.Rpa[k].real+10, y = sol.Rpa[k].imag+20, label = f'${np.absolute(sol.Apaa[k])/1000:.2f}~m/s^2$', colour='red') + # TIME IDENTIFICATION annotate("label", x = 120, y = -80, label = f'Time: ${sol.time[k]:.2f}~s$', alpha = 1) + # labs(x='$x~[mm]$', y='$y~[mm]$') + coord_cartesian(xlim=SCALE_X, ylim=SCALE_Y) + # Scales plot limits, avoiding it to be bigger than necessary. You may comment this out if you wish to do so. theme_bw() # Plot is prettier with this theme compared to the default. ) return p
def plot_revigo( rev, outline=2, expand_points=(1.05, 1.2), figure_size=(8, 8), font_size=8, point_size=3, point_alpha=0.7, palette='RdPu', dispensability_cutoff=1., show_all_labels=False, text_column='name', term_size_limit=None, ): import plotnine as p9 import matplotlib.patheffects as path_effects pe = [ path_effects.Stroke(linewidth=2, foreground='white'), path_effects.Normal() ] if not show_all_labels: lbl_df = rev[(rev.eliminated == 0) & (rev.dispensability < dispensability_cutoff)] if term_size_limit is not None: lbl_df = lbl_df[lbl_df.term_size < term_size_limit] else: lbl_df = rev g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) + p9.geom_point(p9.aes(fill='neglog10', size='frequency'), color='black', alpha=point_alpha) + p9.geom_text(p9.aes(label=text_column), data=lbl_df, size=font_size, adjust_text={ 'expand_points': expand_points, 'arrowprops': { 'arrowstyle': '-' }, 'x': rev.plot_X.values, 'y': rev.plot_Y.values }, path_effects=pe) + p9.theme_bw() + p9.scale_fill_distiller(type='seq', palette=palette, direction=1) + p9.labs(x='Semantic similarity space', y='', fill='-log10(adj. p-value)', size='Term frequency') + p9.scale_size_continuous(range=(2, 7), trans='log10') + p9.theme(figure_size=figure_size, axis_text_x=p9.element_blank(), axis_text_y=p9.element_blank(), axis_ticks=p9.element_blank())) return g
def plot_paired_ranking( method1_summary_df, method2_summary_df, method1_name, method2_name, output_figure_filename, ): # Join dataframes to make sure the rows are aligned merged_summary_df = method1_summary_df.merge( method2_summary_df, left_index=True, right_index=True, suffixes=[f"_{method1_name}", f"_{method2_name}"], ) # Format input dataframe plot_df = pd.DataFrame( data={ "Method1 ranking": merged_summary_df[ f"Percentile (simulated)_{method1_name}" ].values, "Method2 ranking": merged_summary_df[ f"Percentile (simulated)_{method2_name}" ].values, }, index=merged_summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking")) fig += pn.geom_point() fig += pn.labs( x=f"{method1_name} pathway ranking", y=f"{method2_name} pathway ranking", title=f"{method1_name} vs {method2_name} pathway ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, ) print(fig)
def plot_histogram(df_plot, variable_column, output_file='plot_distribution', facet_column='none', x_log10=False): """Plot plot_distribution to png. Parameters ---------- df_plot : pandas.DataFrame DataFrame with <variable_column> as a column. variable_column : string String of variable_column column to plot. output_file : string Basename of output file. facet_column : string Column to facet the plot by. Returns ------- NULL """ df_plot['x'] = df_plot[variable_column] if x_log10: if np.any(df_plot['x'].values < 0): return 1 elif np.any(df_plot['x'].values == 0): df_plot['x'] = np.log10(df_plot['x'].values + 1e-10) variable_column = variable_column + ' (log10)' else: df_plot['x'] = np.log10(df_plot['x'].values) variable_column = variable_column + ' (log10)' gplt = plt9.ggplot(df_plot, plt9.aes(x='x')) gplt = gplt + plt9.theme_bw() gplt = gplt + plt9.geom_histogram(alpha=0.8) gplt = gplt + plt9.scale_x_continuous( # trans='log10', # labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.scale_y_continuous( # trans='log10', # labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.labs(title='', x=variable_column) gplt = gplt + plt9.theme(axis_text_x=plt9.element_text(angle=-45, hjust=0)) if facet_column != 'none': gplt = gplt + plt9.facet_wrap('~ {}'.format(facet_column), ncol=5) n_facets = df_plot[facet_column].nunique() gplt.save('{}.png'.format(output_file), dpi=300, width=6 * (n_facets / 4), height=4 * (n_facets / 4), limitsize=False) else: gplt.save('{}.png'.format(output_file), dpi=300, width=4, height=4) return 0
def scatter_plot2(df1, df2, xcol, ycol, domain, color1='black', color2='red', xname=None, yname=None, log=False, width=6, height=6, clamp=True, tickCount=5): assert len(domain) == 2 POINT_SIZE = 1.5 DASH_PATTERN = (0, (6, 2)) if xname is None: xname = xcol if yname is None: yname = ycol # formatter for axes' labels ax_formatter = mizani.custom_format('{:n}') if clamp: # clamp overflowing values if required df1 = df1.copy(deep=True) df1.loc[df1[xcol] > domain[1], xcol] = domain[1] df1.loc[df1[ycol] > domain[1], ycol] = domain[1] df2 = df2.copy(deep=True) df2.loc[df2[xcol] > domain[1], xcol] = domain[1] df2.loc[df2[ycol] > domain[1], ycol] = domain[1] # generate scatter plot scatter = p9.ggplot(df1) scatter += p9.aes(x=xcol, y=ycol) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, color=color1, alpha=0.5) scatter += p9.geom_point(size=POINT_SIZE, na_rm=True, data=df2, color=color2, alpha=0.5) scatter += p9.labs(x=xname, y=yname) # rug plots scatter += p9.geom_rug(na_rm=True, sides="tr", color=color1, alpha=0.05) scatter += p9.geom_rug(na_rm=True, sides="tr", data=df2, color=color2, alpha=0.05) if log: # log scale scatter += p9.scale_x_log10(limits=domain, labels=ax_formatter) scatter += p9.scale_y_log10(limits=domain, labels=ax_formatter) else: scatter += p9.scale_x_continuous(limits=domain, labels=ax_formatter) scatter += p9.scale_y_continuous(limits=domain, labels=ax_formatter) # scatter += p9.theme_xkcd() scatter += p9.theme_bw() scatter += p9.theme(panel_grid_major=p9.element_line(color='#666666', alpha=0.5)) scatter += p9.theme(panel_grid_minor=p9.element_blank()) scatter += p9.theme(figure_size=(width, height)) scatter += p9.theme(text=p9.element_text(size=24, color="black")) # generate additional lines scatter += p9.geom_abline(intercept=0, slope=1, linetype=DASH_PATTERN) # diagonal scatter += p9.geom_vline(xintercept=domain[1], linetype=DASH_PATTERN) # vertical rule scatter += p9.geom_hline(yintercept=domain[1], linetype=DASH_PATTERN) # horizontal rule res = scatter return res
def plot_save_rank(df_ranks, df_teams, year, week, show=False): """Plot the ranking iterations for each team :param df_ranks: data frame with team_id, and rankings for each iteration :param df_teams: data frame with team_id and owner info :param year: year for data :param week: current week :param show: flag to display the plot :return: final summarised rankings data frame with columns for team_id and ranks """ # Plot each iteration df_ranks_lsq = pd.merge(df_teams[['team_id', 'firstName']], df_ranks, on='team_id') # Space out labels on x-axis according to final rankings df_ranks_lsq['label_x_pos'] = df_ranks_lsq.get( 99).rank() * 100 / df_ranks_lsq.get(99).size # Convert to long format for plotting ease df_ranks_lsq_long = (df_ranks_lsq.rename({ 'ranks': '0' }, axis='columns').melt(id_vars=['team_id', 'firstName', 'label_x_pos'])) # Convert iteration variable to int df_ranks_lsq_long.variable = df_ranks_lsq_long.variable.astype(int) # Make the plot p = (ggplot(aes( x='variable', y='value', color='factor(team_id)', group='team_id'), data=df_ranks_lsq_long) + geom_line() + geom_label(aes(label='firstName', x='label_x_pos', y='value', color='factor(team_id)'), data=df_ranks_lsq_long[df_ranks_lsq_long.variable == 99], size=10) + labs(x='Iteration', y='LSQ rank') + theme_bw() + guides(color=False)) # Save plot if show: p.draw() # make dir if it doesn't exist already out_dir = Path(f'output/{year}/week{week}') out_dir.mkdir(parents=True, exist_ok=True) out_name = out_dir / 'lsq_iter_rankings.png' # plotnine is throwing too many warnings warnings.filterwarnings('ignore') p.save(out_name, width=9, height=6, dpi=300) warnings.filterwarnings('default') logger.info(f'Saved LSQ rankings plot to local file: {out_name.resolve()}') # Average last 70 elements to get final rank df_final_ranks = (df_ranks_lsq_long.query('variable>70').groupby([ 'team_id' ])[['value' ]].agg(lambda x: np.tanh(np.mean(x) / 75.)).reset_index().rename( {'value': 'lsq'}, axis=1)) # Normalize by max score df_final_ranks['lsq'] = df_final_ranks.get('lsq') / df_final_ranks.get( 'lsq').max() return df_final_ranks
def calc_tiers(df_ranks, year, week, bw=0.09, order=4, show=False): """Calculate 3-5 tiers using Gaussian Kernel Density Estimation :param df_ranks: data frame with power rankings for each team :param year: current year :param week: current week :param bw: bandwidth for KDE :param order: order parameter for KDE :param show: flag to show plot :return: None """ logger.info('Calculating tiers for power rankings') # Estimate the kernel using power rankings kde = gaussian_kde(df_ranks.get('power'), bw_method=bw) # Create grid of points for plot x_grid = np.linspace( df_ranks.get('power').min() - 10., df_ranks.get('power').max() + 10, df_ranks.get('power').size * 10) # Calculate densities for each grid point for plotting df_kde = pd.DataFrame(dict(x=x_grid, kde=kde(x_grid))) # Calculate relative minimums to determine tiers rel_min = pd.DataFrame( dict(rel_min=x_grid[argrelmin(kde(x_grid), order=order)[0]])) # Only keep 5 tiers tier_mins = sorted(rel_min.rel_min.values, reverse=True)[:4] # Find position of power rank when added to list of minimums to get tier df_ranks['tier'] = df_ranks.apply(lambda x: sorted( tier_mins + [x.power], reverse=True).index(x.power) + 1, axis=1) # Plot KDE and overlay tiers and actual power rankings as vertical lines tier_plot = ( ggplot(aes(x='x', y='kde'), data=df_kde) + geom_line(size=1.5) + geom_vline( aes(xintercept='rel_min'), data=rel_min, color='red', alpha=0.7) + geom_vline(aes(xintercept='power'), data=df_ranks, color='blue', linetype='dashed', alpha=0.4) + theme_bw() + labs(x='Power Rankings', y=f'KDE (bw: {bw}, order: {order})', title=f'Tiers for week {week}')) # Show plot if show: tier_plot.draw() # Create directory if it doesn't exist to save plot out_dir = Path(f'output/{year}/week{week}') out_dir.mkdir(parents=True, exist_ok=True) out_name = out_dir / 'tiers.png' # Save plot (plotnine is throwing too many warnings...) warnings.filterwarnings('ignore') tier_plot.save(out_name, width=9, height=6, dpi=300) warnings.filterwarnings('default') logger.info(f'Saved Tiers plot to local file: {out_name.resolve()}') return df_ranks
def plot_umap_cell_line(embedding_df, fig_file, cell_line_column, color_labels, color_values): cell_line_gg = ( gg.ggplot(embedding_df, gg.aes(x="x", y="y")) + gg.geom_point( gg.aes(color=cell_line_column), size=0.2, shape=".", alpha=0.2) + gg.theme_bw() + gg.scale_color_manual( name="Cell Line", labels=color_labels, values=color_values)) cell_line_gg.save(filename=fig_file, height=4, width=5, dpi=500) return cell_line_gg
def plot_replicate_correlation( df, batch, plate, facet_string=None, split_samples=False, output_file_base=None, output_file_extensions=[".png", ".pdf", ".svg"], dpi=500, height=4, width=5, return_plot=False, ): correlation_gg = ( gg.ggplot( df, gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"), ) + gg.geom_boxplot( alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5 ) + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0) + gg.scale_fill_manual( name="Replicate", labels={"True": "True", "False": "False"}, values=["#B99638", "#2DB898"], ) + gg.xlab("Replicates") + gg.ylab("Pearson Correlation") + gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme( subplots_adjust={"wspace": 0.2}, title=gg.element_text(size=5), axis_text=gg.element_text(size=4), axis_title=gg.element_text(size=5), legend_text=gg.element_text(size=4), legend_title=gg.element_text(size=5), strip_text=gg.element_text(size=4, color="black"), strip_background=gg.element_rect(colour="black", fill="#fdfff4"), ) ) if split_samples: assert facet_string, "To split samples, specify a facet_string" correlation_gg += gg.facet_wrap(facet_string) if output_file_base: save_figure( correlation_gg, output_file_base, output_file_extensions, dpi, height, width ) if return_plot: return correlation_gg
def plot_outcome_counts(read_file_1, read_file_2, save_file, plot_dir): temp_sub = pd.read_csv(os.path.join(dir_output, read_file_1)) temp_agg = pd.read_csv(os.path.join(dir_output, read_file_2)) temp_sub = recode_outcome(temp_sub) temp_agg = recode_outcome(temp_agg) plot_output = os.path.join(dir_figures, plot_dir) dat = pd.concat([temp_agg, temp_sub], axis=0).reset_index(drop=True) dat = dat.groupby(['outcome', 'model']).size().reset_index(name='counts') img = (ggplot(dat, aes(x='outcome', y='counts', fill='model')) + geom_bar(stat='identity', position='dodge')) + labs( x='Outcome', y='Counts') + theme_bw() img.save(os.path.join(plot_output, save_file))
def estimate_cutoffs_plot(output_file, df_plt, df_cell_estimate_cutoff, df_fit=None, scale_x_log10=False, save_plot=True): """Plot UMI counts by sorted cell barcodes.""" if min(df_plt['umi_counts']) <= 0: fix_log_scale = min(df_plt['umi_counts']) + 1 df_plt['umi_counts'] = df_plt['umi_counts'] + fix_log_scale gplt = plt9.ggplot() gplt = gplt + plt9.theme_bw() if len(df_plt) <= 50000: gplt = gplt + plt9.geom_point(mapping=plt9.aes(x='barcode', y='umi_counts'), data=df_plt, alpha=0.05, size=0.1) else: gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='barcode', y='umi_counts'), data=df_plt, alpha=0.25, size=0.75, color='black') gplt = gplt + plt9.geom_vline(mapping=plt9.aes(xintercept='n_cells', color='method'), data=df_cell_estimate_cutoff, alpha=0.75, linetype='dashdot') gplt = gplt + plt9.scale_color_brewer(palette='Dark2', type='qual') if scale_x_log10: gplt = gplt + plt9.scale_x_continuous( trans='log10', labels=comma_labels, minor_breaks=0) else: gplt = gplt + plt9.scale_x_continuous(labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.scale_y_continuous( trans='log10', labels=comma_labels, minor_breaks=0) gplt = gplt + plt9.labs(title='', y='UMI counts', x='Barcode index, sorted by UMI count', color='Cutoff') # Add the fit of the droplet utils model if df_fit: gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='x', y='y'), data=df_fit, alpha=1, color='yellow') if save_plot: gplt.save('{}.png'.format(output_file), dpi=300, width=5, height=4) return gplt
def plot_fusion(self): """ plot fusion count """ p9.theme_set(p9.theme_void()) for ref in self.pos_dict: if ref in self.df_tsne.columns: out_plot_file = f'{self.out_prefix}_{ref}_fusion.pdf' plot = p9.ggplot(self.df_tsne, p9.aes(x="tSNE_1", y="tSNE_2", color=ref)) + \ p9.geom_point(size=0.2) + \ p9.theme_bw() + \ p9.scale_color_gradient(low="lightgrey",high="blue") plot.save(out_plot_file)
def plot_replicates_greyscale(self): """ Some journals require greyscale graphs. This method makes that simple. """ from plotnine import ggplot, ylab, xlab, geom_line, aes, theme_bw, scale_color_grey plot = ((ggplot(self.data, aes('Time', 'Current', color='Channel')) + ylab(u'Current (μA)') + xlab('Time (seconds)') + geom_line() + theme_bw() + scale_color_grey())) print(plot) return plot
def plot_base_temp(df): pp = p9.ggplot( df, p9.aes(x='mos_since_start', y='value', group='variable', colour='variable', shape='variable', linetype='variable')) pp = pp + p9.geom_line(alpha = aes_color_alpha) +\ p9.geom_point(show_legend=True, alpha = aes_color_alpha) +\ aes_color + aes_glyphs +\ p9.theme_bw(base_size=9) + aes_fte_theme + aes_model_xlab return pp
def qq_plot(df, limit=20000): return ( pn.ggplot( df .sort_values('P') .assign(OBS=lambda df: -np.log10(df['P'])) .assign(EXP=lambda df: -np.log10(np.arange(1, len(df) + 1) / float(len(df)))) .head(limit), pn.aes(x='EXP', y='OBS') ) + pn.geom_point() + pn.geom_abline() + pn.theme_bw() )
def plot_result_stats(results, title): stats = results.describe().unstack().reset_index().rename(columns={ "level_0": "metric", "level_1": "group", 0: "value" }) stats = stats[~stats["group"].isin(["count", "min", "max"])] stats["value_presentation"] = round(stats["value"], 2) plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") + p9.geom_col(position="dodge") + p9.theme_bw() + p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) + p9.geom_text(p9.aes(label="value_presentation"), position=p9.position_dodge(width=0.9), va="bottom")) return plot
def theme_cognoma(fontsize_mult=1): import plotnine as gg return (gg.theme_bw(base_size = 14 * fontsize_mult) + gg.theme( line = gg.element_line(color = "#4d4d4d"), rect = gg.element_rect(fill = "white", color = None), text = gg.element_text(color = "black"), axis_ticks = gg.element_line(color = "#4d4d4d"), legend_key = gg.element_rect(color = None), panel_border = gg.element_rect(color = "#4d4d4d"), panel_grid = gg.element_line(color = "#b3b3b3"), panel_grid_major_x = gg.element_blank(), panel_grid_minor = gg.element_blank(), strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"), axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"), axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"), axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d") ))
def accPlot(accsByNFeats): plotdata = [] for s in accsByNFeats: plotdata.append(pd.concat([DataFrame({"p" : p, "acc" : accsByNFeats[s][p], "set" : s}, index = [str(p)]) for p in accsByNFeats[s]], axis = 0)) ggd = pd.concat(plotdata) ggd['acc'] = ggd['acc'].astype(float) ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set')) ggo += gg.geom_line(alpha=0.5) ggo += gg.geom_point() ggo += gg.theme_bw() ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000]) ggo += gg.scale_color_manual(values=['darkgray', 'black', 'red', 'dodgerblue']) ggo += gg.ylab('Accuracy (5-fold CV)') print(ggo)
def test_theme_bw(self): p = self.g + labs(title='Theme BW') + theme_bw() assert p + _theme == 'theme_bw'
x['k'], x['resubAccuracy'], x['testAccuracy']) for x in repeatedKnnResults], columns = ['p', 'k', 'resubAccuracy', 'testAccuracy']) ggdata = pd.concat( [DataFrame({'p' : knnResultsSimplified.p, 'k' : knnResultsSimplified.k.apply(int), 'type' : 'resub', 'Accuracy' : knnResultsSimplified.resubAccuracy}), DataFrame({'p' : knnResultsSimplified.p, 'k' : knnResultsSimplified.k.apply(int), 'type' : 'test', 'Accuracy' : knnResultsSimplified.testAccuracy})], axis = 0 ) plt.close() ggo = gg.ggplot(ggdata, gg.aes(x='p', y='Accuracy', color='type', group='type', linetype='type')) ggo += gg.facet_wrap('~ k') ggo += gg.scale_x_log10() ggo += gg.geom_point(alpha=0.6) ggo += gg.stat_smooth() ggo += gg.theme_bw() print(ggo)