def analyze_kimura_distances(env, df): # type: (Environment, pd.DataFrame) -> None pd_work = env["pd-work"] df = df[df["Kimura-to-query"] != "[]"].copy() df["Kimura-to-query"] = df["Kimura-to-query"].apply(ast.literal_eval) df["Average-Kimura"] = df["Kimura-to-query"].apply(np.mean) df["Std-Kimura"] = df["Kimura-to-query"].apply(np.std) sns.lmplot(df, "Genome GC", "Average-Kimura", hue="Ancestor", sns_kwargs={ "scatter": False, "lowess": True, "scatter_kws": { "s": 5 }, "palette": CM.get_map("ancestor") }, figure_options=FigureOptions(save_fig=next_name(pd_work))) df_mean = df.groupby(["Ancestor", "GCFID"], as_index=False).mean() sns.lmplot(df_mean, "Genome GC", "Average-Kimura", hue="Ancestor", sns_kwargs={ "scatter": True, "lowess": True, "scatter_kws": { "s": 5 }, "palette": CM.get_map("ancestor") }, figure_options=FigureOptions(save_fig=next_name(pd_work))) # Min/max kimura df["Min-Kimura"] = df["Kimura-to-query"].apply(min) df["Max-Kimura"] = df["Kimura-to-query"].apply(max) contour_kimura_per_ancestor(env, df) one_dim_Kimura_accuracy(env, df) kimura_dist_plot(env, df) heat_map_Kimura_accuracy(env, df, "Min-Kimura", "Max-Kimura", balance=True, xlabel="Minimum Kimura", ylabel="Maximum Kimura") heat_map_Kimura_accuracy(env, df, "Average-Kimura", "Std-Kimura", balance=False)
def analyze_gms2_components_on_verified_set(env, gil): # type: (Environment, GenomeInfoList) -> None # run different components list_df = list() for gi in gil: list_df.append( analyze_gms2_components_on_verified_set_for_gi(env, gi) ) df = pd.concat(list_df, ignore_index=True, sort=False) df["Genome"] = df.apply(fix_names, axis=1) print(df.to_csv()) fig, ax = plt.subplots(figsize=(12,4)) sns.barplot(df, "Genome", "Error", hue="Component", ax=ax, figure_options=FigureOptions( save_fig=next_name(env["pd-work"]) ), sns_kwargs={ "hue_order": reversed(["GMS2", "MGM2*", "Start Context", "RBS", "Start Codons", "Promoter", "MGM"]), "palette": CM.get_map("gms2_components") })
def kimura_dist_plot(env, df): import seaborn import matplotlib.pyplot as plt ancestors = list(set(df["Ancestor"])) # fig, axes = plt.subplots(2, math.ceil(len(ancestors)/2), sharex=True, sharey=True) # # for anc, ax in zip(ancestors, axes.ravel()): # # df_group = df[df["Ancestor"] == anc] # seaborn.distplot(df_group["Average-Kimura"], ax=ax, color=CM.get_map("ancestor")[anc], # hist=False) # ax.set_title(anc) # plt.show() fig, ax = plt.subplots() # type: plt.Figure, plt.Axes for anc in ancestors: df_group = df[df["Ancestor"] == anc] seaborn.distplot(df_group["Average-Kimura"], ax=ax, color=CM.get_map("ancestor")[anc], hist=False, label=anc) # ax.set_title(anc) ax.legend(ancestors) ax.set_ylabel("PDF") save_figure(FigureOptions(save_fig=next_name(env["pd-work"]))) plt.show()
def main(env, args): # type: (Environment, argparse.Namespace) -> None df = pd.read_csv(args.pf_input, header=0) add_percentages(df) sns.set_context(context="paper", font_scale=1.5) #clean up df = df[df["Total Candidates"] < 100] colors = ["windows blue", "amber", "faded green", "dusty purple"] palette = sns.xkcd_palette(colors) sns.palplot(palette) plt.show() sns.set_palette(palette) fig_num = 0 plt.figure(figsize=(12, 4)) sns.jointplot(x="gc", y="Total Candidates", data=df) # plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) plt.savefig(os.path.join(env["pd-work"], "{}.pdf".format(fig_num)), bbox_inches='tight') plt.show() fig_num += 1 # Average number of candidates per GC df_tmp = df.groupby(["gcfid", "ancestor"], as_index=False).agg("mean") plt.figure(figsize=(12, 4)) g = sns.scatterplot(x="gc", y="Total Candidates", data=df_tmp, hue="ancestor", palette=CM.get_map("ancestor")) plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) g.set(ylabel="Average number of candidates") g.set(xlabel="GC") plt.savefig(os.path.join(env["pd-work"], "{}.pdf".format(fig_num)), bbox_inches='tight') plt.show() fig_num += 1 # Average number of candidates per GC plt.figure(figsize=(12, 4)) g = sns.lmplot(x="gc", y="Total Candidates", data=df_tmp, hue="ancestor", aspect=2, legend=False, ci=None, lowess=True, palette=CM.get_map("ancestor")) plt.legend(loc='center left', bbox_to_anchor=(1.05, 0.5)) g.set(ylabel="Average number of candidates") g.set(xlabel="GC") plt.savefig(os.path.join(env["pd-work"], "{}.pdf".format(fig_num)), bbox_inches='tight') plt.show() fig_num += 1 pass
def analyze_upstream_distances(env, df): # type: (Environment, pd.DataFrame) -> None pd_work = os_join(env["pd-work"], "upstream_distances") mkdir_p(pd_work) # remove empty lists df = df[df["Upstream-distance"] != "[]"].copy() df["Upstream-distance"] = df["Upstream-distance"].apply(ast.literal_eval) df["Most frequent upstream"] = df["Upstream-distance"].apply(most_frequent) # compute consistencies with different flexibilities for flexibility in {0, 3}: df["PC(x,{})".format(flexibility)] = df[[ "Most frequent upstream", "Upstream-distance" ]].apply(lambda r: compute_consistency(r["Upstream-distance"], r[ "Most frequent upstream"], flexibility), axis=1) df = df[df["Support"] > 10].copy() # for mf in range(-20, 50): # df_mf = df[df["Most frequent upstream"] == mf] # if len(df_mf) < 50: # continue # # sns.distplot(df_mf, "PC(x,0)", figure_options=FigureOptions( # title="PC({},{})".format(mf, 0), # save_fig=next_name(pd_work), # xlim=(0,1) # )) # sns.distplot(df_mf, "PC(x,3)", figure_options=FigureOptions( # title="PC({},{})".format(mf, 3), # save_fig=next_name(pd_work), # xlim=(0, 1) # )) # plot distribution of Average PC import seaborn import matplotlib.pyplot as plt df_tmp = df[(df["Support"] > 10) & (df["Most frequent upstream"] < 100) & (df["Most frequent upstream"] > -50)] # NCBI consistency as a func df = df[(df["Support"] > 10) & (df["GMS2=SBSP"]) & (df["Most frequent upstream"] < 100) & (df["Most frequent upstream"] > -50)] df_tmp = stack_columns_as_rows( df_tmp[["Most frequent upstream", "PC(x,0)", "PC(x,3)", "Ancestor"]], ["PC(x,0)", "PC(x,3)"], "PC(x,f)", None, label_col="Flexibility") # seaborn.lmplot("Most frequent upstream", "PC(x,f)", df_tmp, # scatter=False, hue="Flexibility", lowess=True) # plt.show() # # seaborn.lmplot("Most frequent upstream", "PC(x,f)", df_tmp, # hue="Flexibility", lowess=True) # plt.show() # # seaborn.lmplot("Most frequent upstream", "PC(x,f)", df_tmp, # scatter=False, hue="Flexibility") # plt.show() sns.lmplot(df_tmp, "Most frequent upstream", "PC(x,f)", hue="Flexibility", sns_kwargs={ "scatter": False, "lowess": True }, figure_options=FigureOptions(save_fig=next_name(pd_work), xlim=[-7, None], ylim=[0, 1])) sns.distplot(df, "Most frequent upstream", figure_options=FigureOptions(save_fig=next_name(pd_work)), sns_kwargs={"kde": True}) import seaborn # seaborn.countplot("Most frequent upstream", data=df[(df["Most frequent upstream"] < 10) & (df["Most frequent upstream"] > -10)], hue="Ancestor") (df[(df["Most frequent upstream"] < 10) & (df["Most frequent upstream"] > -10)].groupby("Ancestor") ["Most frequent upstream"].value_counts(normalize=True).mul(100).rename( 'Percentage (by clade)').reset_index().pipe( (seaborn.catplot, 'data'), x="Most frequent upstream", y='Percentage (by clade)', hue="Ancestor", kind='point', scale=0.5, legend=False, palette=CM.get_map("ancestor"), aspect=1.5)) plt.legend(loc="best", title="Clade") figure_options = FigureOptions( save_fig=next_name(pd_work), xlabel="Most frequent distance to upstream gene", ylabel="Percent of components (by clade)") plt.xlabel(figure_options.xlabel) plt.ylabel(figure_options.ylabel) save_figure(figure_options) plt.show() (df[(df["Most frequent upstream"] < 10) & (df["Most frequent upstream"] > -10)].groupby("Ancestor") ["Most frequent upstream"].value_counts().rename( 'number').reset_index().pipe((seaborn.catplot, 'data'), x="Most frequent upstream", y='number', hue="Ancestor", kind='point', scale=0.5, legend=False, palette=CM.get_map("ancestor"), aspect=1.5)) plt.legend(loc="best", title="Clade") figure_options = FigureOptions( save_fig=next_name(pd_work), xlabel="Most frequent distance to upstream gene", ylabel="Number of components") plt.xlabel(figure_options.xlabel) plt.ylabel(figure_options.ylabel) save_figure(figure_options) plt.show() f, ax1 = plt.subplots() ax2 = ax1.twinx() for ancestor, df_group in df.groupby("Ancestor"): seaborn.distplot(df_group["Most frequent upstream"], kde=False, ax=ax1) # ax2.set_ylim(0, 3) ax2.yaxis.set_ticks([]) seaborn.kdeplot(df_group["Most frequent upstream"], ax=ax2) ax1.set_xlabel('x var') ax1.set_ylabel('Counts') # g = seaborn.FacetGrid(df, hue="Ancestor") # g = g.map(seaborn.distplot, "Most frequent upstream", hist=True) plt.show() print(df["Most frequent upstream"].value_counts(normalize=True)) sns.lmplot( df, "Most frequent upstream", "PC(x,0)", hue="Ancestor", sns_kwargs={ "scatter": False, "lowess": True, "palette": CM.get_map("ancestor") }, figure_options=FigureOptions(save_fig=next_name(pd_work), xlim=[-7, None], ylim=[0, 1]), ) sns.lmplot(df, "Most frequent upstream", "PC(x,3)", hue="Ancestor", sns_kwargs={ "scatter": False, "lowess": True, "palette": CM.get_map("ancestor") }, figure_options=FigureOptions(save_fig=next_name(pd_work), xlim=[-7, None], ylim=[0, 1])) # NCBI sensitivity # collect: # average 5' per ancestor, r, ranges = [(-5, 0), (0, 10), (10, 30), (30, 50), (50, 70)] list_collect = list() for r in ranges: r_filter = (df["Most frequent upstream"] >= r[0]) & (df["Most frequent upstream"] < r[1]) df_summary_per_gcfid = get_summary_per_gcfid(df[r_filter]) # viz_summary_per_gcfid(env, df_summary_per_gcfid, title=str(r)) df_summary_per_gcfid = df_summary_per_gcfid.groupby( "Ancestor", as_index=False).mean() df_summary_per_gcfid["Range"] = str(r) list_collect.append(df_summary_per_gcfid) df_tmp = pd.concat(list_collect, sort=False) sns.catplot(df_tmp, "Range", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", kind="point", sns_kwargs={"palette": CM.get_map("ancestor")}) sns.catplot(df_tmp, "Range", "GMS2=SBSP", hue="Ancestor", kind="point", sns_kwargs={"palette": CM.get_map("ancestor")}) # do not average per gcfid - average per ancestor list_collect = list() range_avgs = list() range_label = list() for r in ranges: r_filter = (df["Most frequent upstream"] >= r[0]) & (df["Most frequent upstream"] < r[1]) df_r = df[r_filter] for ancestor, df_group in df_r.groupby( "Ancestor", as_index=False): # type: str, pd.DataFrame f_gms2_eq_sbsp_with_ncbi_pred = (df_group["GMS2=SBSP"]) & ( df_group["NCBI"]) f_gms2_eq_sbsp_not_eq_ncbi = (f_gms2_eq_sbsp_with_ncbi_pred) & ( df_group["(GMS2=SBSP)!=NCBI"]) sensitivity = 100 * f_gms2_eq_sbsp_not_eq_ncbi.sum() / float( f_gms2_eq_sbsp_with_ncbi_pred.sum()) list_collect.append({ "Ancestor": ancestor, "Range": str(r), "range_avg": (r[1] + r[0]) / 2.0, "(GMS2=SBSP)!=NCBI % GMS2=SBSP": sensitivity, "GMS2=SBSP": f_gms2_eq_sbsp_with_ncbi_pred.sum() }) range_label.append(r) range_avgs.append((r[1] + r[0]) / 2.0) df_tmp = pd.DataFrame(list_collect) sns.catplot(df_tmp, "Range", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", kind="point", sns_kwargs={"palette": CM.get_map("ancestor")}) sns.catplot(df_tmp, "Range", "GMS2=SBSP", hue="Ancestor", kind="point", sns_kwargs={"palette": CM.get_map("ancestor")}) ancestors = list(set(df_tmp["Ancestor"])) fig, axes = plt.subplots( len(ancestors), 1, sharex="all", ) for ancestor, ax in zip(ancestors, axes.ravel()): # type: str, plt.Axes ax2 = ax.twinx() curr_df = df_tmp[df_tmp["Ancestor"] == ancestor] seaborn.lineplot("range_avg", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", data=curr_df, ax=ax) seaborn.lineplot("range_avg", "GMS2=SBSP", data=curr_df, color='r', legend=False, ax=ax2) ax.set_ylabel(None) ax2.set_ylabel(None) ax.set_xlabel("Range Average") plt.xticks(range_avgs, range_label) plt.show() fig, ax = plt.subplots() ax2 = ax.twinx() seaborn.lineplot("range_avg", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", data=df_tmp, ax=ax, color="b", ci=None, hue="Ancestor") seaborn.lineplot("range_avg", "GMS2=SBSP", data=df_tmp, ci=None, color='r', legend=False, ax=ax2, hue="Ancestor") # plt.xticks(range_avgs, range_label) ax.set_ylim([0, None]) ax2.set_ylim([0, None]) ax.set_ylabel("NCBI 5' error rate vs GMS2=SBSP") ax2.set_ylabel("Number of GMS2=SBSP genes") ax.set_xlabel("Range Average") ax.yaxis.label.set_color('b') ax2.yaxis.label.set_color('r') ax.set_xlabel("Distance to upstream gene (nt)") plt.show() # sbsp_geom_density(df, "Most frequent upstream", "GMS2=SBSP=NCBI", pd_work) # # for ancestor, df_group in df.groupby("Ancestor", as_index=False): # sbsp_geom_density(df_group, "Most frequent upstream", "GMS2=SBSP=NCBI", pd_work, ancestor) # sbsp_geom_density(df_group, "Support", "GMS2=SBSP=NCBI", pd_work, ancestor) a = 0
def one_dim_Kimura_accuracy(env, df_all, num_steps=20): # type: (Environment, pd.DataFrame, int) -> None import matplotlib.pyplot as plt pd_work = env["pd-work"] ancestors = sorted(list(set(df_all["Ancestor"]))) # fig, axes = plt.subplots(2, math.ceil(len(ancestors) / 2), sharex=True, sharey=True) # min_x = min(df_all["Average-Kimura"]) # max_x = max(df_all["Average-Kimura"]) + 0.000000001 # ss_x = (max_x - min_x) / float(num_steps) # # list_df = list() # axis_idx = 0 # for ancestor, df in df_all.groupby("Ancestor", as_index=False): # # ax = axes.ravel()[axis_idx] # # axis_idx += 1 # # # # # # import numpy as np # gms2_eq_sbsp_and_ncbi = np.zeros(num_steps, dtype=float) # gms2_eq_sbsp_eq_ncbi = np.zeros(num_steps, dtype=float) # # df_gms2_eq_sbsp_and_ncbi = (df["GMS2=SBSP"]) & (df["NCBI"]) # df_gms2_eq_sbsp_eq_ncbi = (df["GMS2=SBSP=NCBI"]) # # for index in df.index: # # x_val = df.at[index, "Average-Kimura"] # # x_pos = int((x_val-min_x) / ss_x) # # gms2_eq_sbsp_and_ncbi[x_pos] += 1 if df.at[index, "GMS2=SBSP"] and df.at[index, "NCBI"] else 0 # gms2_eq_sbsp_eq_ncbi[x_pos] += 1 if df.at[index, "GMS2=SBSP=NCBI"] else 0 # # accuracy = np.divide(gms2_eq_sbsp_eq_ncbi, gms2_eq_sbsp_and_ncbi) # # accuracy = np.flip(accuracy, 0) # # # xticks = list(range(0, num_steps)) # # l_x = np.arange(min_x, max_x, ss_x) # xticklabels = [round(l_x[i], 2) for i in xticks] # # g = seaborn.heatmap(accuracy.transpose(), vmin=0, vmax=1, xticklabels=xticklabels, yticklabels=yticklabels, ax=ax, # # cbar=True) # # # g = seaborn.lineplot(xticklabels, accuracy, ax=ax, label=ancestor) # # # cbar=g.cbar # # # g.set_xticks(xticks) # # curr_df = pd.DataFrame({ # "Average-Kimura": xticklabels, # "Accuracy": accuracy, # "Number-of-queries": gms2_eq_sbsp_and_ncbi # }) # curr_df["Ancestor"] = ancestor # list_df.append(curr_df) # # # g.set_xlabel("Min Kimura") # # g.set_ylabel("Max Kimura") # # g.set_title(ancestor) # # df = pd.concat(list_df) # type: pd.DataFrame df = bin_data_one_d(env, df_all, "Average-Kimura", num_steps) sns.lineplot(df, "Average-Kimura", "Accuracy", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) sns.lineplot(df, "Average-Kimura", "Number-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) total_per_ancestor = { ancestor: (df["Ancestor"].isin({ancestor})).sum() for ancestor in ancestors } df["Percentage-of-queries"] = 0 df["Cumulative-percentage-of-queries"] = 0 df.reset_index(inplace=True) for ancestor, df_group in df.groupby( "Ancestor", as_index=False): # type: str, pd.DataFrame df_group.sort_values("Average-Kimura", inplace=True) index = df_group.index prev = 0 total = df_group["Number-of-queries"].sum() df.loc[index, "Percentage-of-queries"] = 100 * df.loc[ index, "Number-of-queries"] / float(total) for i in index: df.loc[i, "Cumulative-percentage-of-queries"] = prev + df.loc[ i, "Percentage-of-queries"] prev = df.loc[i, "Cumulative-percentage-of-queries"] fig, ax = plt.subplots(figsize=(8, 4)) sns.lineplot(df, "Average-Kimura", "Percentage-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ylabel="Percentage of queries", xlabel="Average Kimura"), ax=ax, show=True, legend_loc="best", sns_kwargs={"palette": CM.get_map("ancestor")}) sns.lineplot(df, "Average-Kimura", "Cumulative-percentage-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) # standard dev df = bin_data_one_d(env, df_all[df_all["Support"] > 2], "Std-Kimura", num_steps) sns.lineplot(df, "Std-Kimura", "Accuracy", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) sns.lineplot(df, "Std-Kimura", "Number-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) total_per_ancestor = { ancestor: (df["Ancestor"].isin({ancestor})).sum() for ancestor in ancestors } df["Percentage-of-queries"] = 0 df["Cumulative-percentage-of-queries"] = 0 df.reset_index(inplace=True) for ancestor, df_group in df.groupby( "Ancestor", as_index=False): # type: str, pd.DataFrame df_group.sort_values("Std-Kimura", inplace=True) index = df_group.index prev = 0 total = df_group["Number-of-queries"].sum() df.loc[index, "Percentage-of-queries"] = 100 * df.loc[ index, "Number-of-queries"] / float(total) for i in index: df.loc[i, "Cumulative-percentage-of-queries"] = prev + df.loc[ i, "Percentage-of-queries"] prev = df.loc[i, "Cumulative-percentage-of-queries"] sns.lineplot(df, "Std-Kimura", "Percentage-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")}) sns.lineplot(df, "Std-Kimura", "Cumulative-percentage-of-queries", hue="Ancestor", figure_options=FigureOptions(save_fig=next_name(pd_work), ), sns_kwargs={"palette": CM.get_map("ancestor")})
def viz_summary_per_gcfid(env, df, title=None): # type: (Environment, pd.DataFrame) -> None pd_work = env['pd-work'] sns.catplot(df, "Ancestor", "GMS2=SBSP % SBSP", kind="box", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[None, 100], title=title, ), sns_kwargs={"palette": CM.get_map("ancestor")}) sns.catplot(df, "Ancestor", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", kind="box", figure_options=FigureOptions(save_fig=next_name(pd_work), ylim=[0, 20], ylabel="1 - Sen(NCBI, GMS2=SBSP)", xlabel="Clade", title=title), sns_kwargs={"palette": CM.get_map("ancestor")}) # per GC sns.scatterplot(df, "Genome GC", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ), legend_loc="best", sns_kwargs={"palette": CM.get_map("ancestor")}) # per GC sns.lmplot(df, "Genome GC", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ylabel="1 - Sen(NCBI, GMS2=SBSP)", ), sns_kwargs={ "palette": CM.get_map("ancestor"), "scatter": False, "lowess": True }) sns.lmplot(df, "Genome GC", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ylabel="1 - Sen(NCBI, GMS2=SBSP)", ), legend_loc="best", sns_kwargs={ "palette": CM.get_map("ancestor"), "scatter": True, "lowess": True, "scatter_kws": { "s": 5 }, "aspect": 1.5 }) sns.lmplot(df, "Genome GC", "GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ), sns_kwargs={ "palette": CM.get_map("ancestor"), "scatter": True, "lowess": True, "scatter_kws": { "s": 5 } }) sns.lmplot(df, "Genome GC", "GMS2=SBSP % SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[50, 100], title=title, ), sns_kwargs={ "palette": CM.get_map("ancestor"), "scatter": True, "lowess": True, "scatter_kws": { "s": 5 } }) sns.lmplot(df, "Genome GC", "GMS2=SBSP % GMS2", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[50, 100], title=title, ), sns_kwargs={ "palette": CM.get_map("ancestor"), "scatter": True, "lowess": True, "scatter_kws": { "s": 5 } }) sns.scatterplot(df, "NCBI", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ), sns_kwargs={ "palette": CM.get_map("ancestor"), }) sns.scatterplot(df, "GMS2=SBSP", "(GMS2=SBSP)!=NCBI % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ), sns_kwargs={ "palette": CM.get_map("ancestor"), }) # per GC sns.scatterplot(df, "Genome GC", "(GMS2=SBSP)!=Prodigal % GMS2=SBSP", hue="Ancestor", figure_options=FigureOptions( save_fig=next_name(pd_work), ylim=[0, None], title=title, ), sns_kwargs={"palette": CM.get_map("ancestor")})
def viz_summary_per_gcfid_per_step(env, df): # type: (Environment, pd.DataFrame) -> None pd_work = env['pd-work'] list_df = list() for gcfid, df_group in df.groupby("GCFID", as_index=False): df.loc[df_group.index, "Total SBSP"] = df.loc[df_group.index, "SBSP"].sum() df.loc[df_group.index, "Total GMS2"] = df.loc[df_group.index, "GMS2"].sum() df.loc[df_group.index, "Total GMS2=SBSP"] = df.loc[df_group.index, "GMS2=SBSP"].sum() tag = None for step in ["A", "B", "C"]: if tag is None: tag = step else: tag += "+" + step df_summary_per_gcfid = get_summary_per_gcfid( df[df["Predicted-at-step"] <= step]) df_summary_per_gcfid["SBSP Step"] = tag list_df.append(df_summary_per_gcfid) df_per_gcfid_per_step = pd.concat(list_df, sort=False) import matplotlib.pyplot as plt # fig, ax = plt.subplots() # # sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "SBSP", hue="GCFID", ax=ax, # sns_kwargs={"palette": CM.get_map("verified")}, # legend=False # ) # for l in ax.lines: # l.set_linestyle("--") # # ax2 = ax.twinx() # sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(SBSP,NCBI)", hue="GCFID", ax=ax2, # sns_kwargs={"palette": CM.get_map("verified")},) # # fo = FigureOptions( # xlabel="SBSP Step", # ylabel="Percentage", # # ylim=[0, 105], # save_fig=next_name(env["pd-work"]) # ) # FigureOptions.set_properties_for_axis(ax, fo) # plt.subplots_adjust(bottom=0.2) # handles, labels = ax.get_legend_handles_labels() # ax.legend(handles=handles[1:], labels=labels[1:], # loc="lower center", ncol=4, bbox_to_anchor=(0.5, -0.25)) # # plt.savefig(fo.save_fig) # plt.show() fig, axes = plt.subplots(3, 2, sharex="all", sharey="row") ax = axes[:, 0] sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(SBSP,NCBI)", hue="GCFID", ax=ax[0], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions( ylabel="Sensitivity", ylim=[85, 105], )) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Cov(SBSP,NCBI)", hue="GCFID", ax=ax[1], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Percent of Genes", ylim=[0, None])) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "SBSP", hue="GCFID", ax=ax[2], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Number of Genes", ylim=[0, None])) fig.align_ylabels(ax) # plt.savefig(next_name(env["pd-work"])) # plt.show() # fig, ax = plt.subplots(3, 1, sharex="all") ax = axes[:, 1] sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(GMS2=SBSP,NCBI)", hue="GCFID", ax=ax[0], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions( ylabel="Sensitivity", ylim=[85, 105], )) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Cov(GMS2=SBSP,NCBI)", hue="GCFID", ax=ax[1], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Percent of Genes", ylim=[0, None])) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "GMS2=SBSP", hue="GCFID", ax=ax[2], sns_kwargs={"palette": CM.get_map("verified")}, figure_options=FigureOptions(ylabel="Number of Genes", ylim=[0, None])) ax[2].get_legend().remove() fig.align_ylabels(ax) for ax in axes.ravel(): ax.set_xlabel("Steps") axes[0][0].set_title("SBSP") axes[0][1].set_title("GMS2=SBSP") fig.subplots_adjust(bottom=0.21) # handles, labels = ax.get_legend_handles_labels() # fig.legend(handles=handles[1:], labels=labels[1:], loc="lower center", ncol=4)#, bbox_to_anchor=(0.5, -0.25)) handles, labels = ax.get_legend_handles_labels() labels[0] = "Genome" fig.legend(handles=handles, labels=labels, loc="lower center", ncol=3) #, bbox_to_anchor=(0.5, -0.25)) plt.savefig(next_name(env["pd-work"])) plt.show() # three plots for gcfid, df_group in df.groupby("GCFID", as_index=False): df.loc[df_group.index, "Total SBSP"] = ((df_group["SBSP"]) & (df_group["NCBI"])).sum() df.loc[df_group.index, "Total GMS2"] = ((df_group["GMS2"]) & (df_group["NCBI"])).sum() df.loc[df_group.index, "Total GMS2=SBSP"] = ((df_group["GMS2=SBSP"]) & (df_group["NCBI"])).sum() df_all = get_summary_per_gcfid(df) print(df_all[[ "GCFID", "NCBI", "Sen(SBSP,NCBI)", "Sen(GMS2,NCBI)", "Sen(GMS2=SBSP,NCBI)" ]].to_string(index=False)) print(df_all[[ "GCFID", "NCBI", "Cov2(SBSP,NCBI)", "Cov2(GMS2,NCBI)", "Cov2(GMS2=SBSP,NCBI)" ]].to_string(index=False)) import sys sys.exit()
def viz_per_genome(env, df): # type: (Environment, pd.DataFrame) -> None df_grp = df.groupby(["Genome", "Ancestor"], as_index=False).mean() sns.catplot(df_grp, "Ancestor", "BLAST", figure_options=FigureOptions(save_fig=next_name( env["pd-work"]), xlabel="Clade", ylabel="Number of BLASTp Hits"), sns_kwargs={"palette": CM.get_map("ancestor")}) # list_grp = list() # for _, df_grp in df.groupby("Genome", as_index=False): # indices = df_grp.index # # list_grp.append({ # "Genome": df.at[indices[0], "Genome"], # "Ancestor": df.at[indices[0], "Ancestor"], # "= 0": len(df_grp[df_grp["BLAST"] == 0]), # **{ # f"< {x}": len(df_grp[df_grp["BLAST"] < x]) for x in [5, 10, 20, 50, 100, 500, 1000, 5000, 10000] # }, # "> 10000": len(df_grp[df_grp["BLAST"] > 10000]) # }) # # df_grp = pd.DataFrame(list_grp) # sns.catplot(df_grp, "Ancestor", "= 0") # sns.catplot(df_grp, "Ancestor", "< 5") # sns.catplot(df_grp, "Ancestor", "< 50") # sns.catplot(df_grp, "Ancestor", "< 100") # plots # 1) x: number of queries with < x targets # compute per genome, the % of queries with hits <= 0, 5, 10, 20, 40, 80, 160, ... 240 580 1160, ... # plot list_entries = list() for _, df_grp in df.groupby("Genome", as_index=False): indices = df_grp.index genome = df.at[indices[0], "Genome"] ancestor = df.at[indices[0], "Ancestor"] total_queries = len(df_grp) curr = 0 for n in range(40): list_entries.append({ "Genome": genome, "Ancestor": ancestor, "x": curr, "y": 100 * len(df_grp[df_grp["BLAST"] < curr]) / total_queries }) # if list_entries[-1]["y"] == 100: # break if curr == 0: curr = 5 else: curr *= 1.2 df_tmp = pd.DataFrame(list_entries) SMALL_SIZE = 16 MEDIUM_SIZE = 22 BIGGER_SIZE = 24 matplotlib.rcParams.update({ # "pgf.texsystem": "pdflatex", 'font.family': 'serif', 'text.usetex': True, 'pgf.rcfonts': False, 'font.size': SMALL_SIZE, # controls default text sizes 'axes.titlesize': SMALL_SIZE, # fontsize of the axes title 'axes.labelsize': MEDIUM_SIZE, # fontsize of the x and y labels 'xtick.labelsize': SMALL_SIZE, # fontsize of the tick labels 'ytick.labelsize': SMALL_SIZE, # fontsize of the tick labels 'legend.fontsize': 12, # legend fontsize 'figure.titlesize': BIGGER_SIZE, # fontsize of the figure title }) sns.lineplot(df_tmp, "x", "y", hue="Ancestor", figure_options=FigureOptions( xlabel="Number of BLASTp hits", ylabel="Cumulative percentage of queries (per genome)", save_fig=next_name(env["pd-work"]), ), legend_loc="best", legend_title="", legend_ncol=2, sns_kwargs={ "ci": "sd", "palette": CM.get_map("ancestor") }) sns.lineplot(df_tmp, "y", "x", hue="Ancestor", figure_options=FigureOptions( ylabel="Number of BLASTp hits", xlabel="Cumulative percentage of queries (per genome)", save_fig=next_name(env["pd-work"]), ), legend_loc="best", legend_title="", legend_ncol=2, sns_kwargs={ "ci": "sd", "palette": CM.get_map("ancestor") }) SMALL_SIZE = 14 MEDIUM_SIZE = 18 BIGGER_SIZE = 20 matplotlib.rcParams.update({ # "pgf.texsystem": "pdflatex", 'font.family': 'serif', 'text.usetex': True, 'pgf.rcfonts': False, 'font.size': SMALL_SIZE, # controls default text sizes 'axes.titlesize': SMALL_SIZE, # fontsize of the axes title 'axes.labelsize': MEDIUM_SIZE, # fontsize of the x and y labels 'xtick.labelsize': SMALL_SIZE, # fontsize of the tick labels 'ytick.labelsize': SMALL_SIZE, # fontsize of the tick labels 'legend.fontsize': 12, # legend fontsize 'figure.titlesize': BIGGER_SIZE, # fontsize of the figure title }) fig, axes = plt.subplots(2, 2, sharex="all", sharey="all") ancestors = sorted(set(df["Ancestor"])) for anc, ax in zip(ancestors, axes.ravel()): df_anc = df_tmp[df_tmp["Ancestor"] == anc] sns.lineplot(df_anc[df_anc["x"] <= 40], "x", "y", hue="Ancestor", legend=None, ax=ax, sns_kwargs={ "ci": "sd", "palette": CM.get_map("ancestor") }) ax.set_title(anc) ax.set_xlabel("") ax.set_ylabel("") figure_options = FigureOptions( xlabel="Number of BLASTp hits", ylabel="Cumulative percentage of\nqueries (per genome)", save_fig=next_name(env["pd-work"]), ) fig.add_subplot(111, frameon=False) # # hide tick and tick label of the big axes plt.tick_params(top=False, bottom=False, left=False, right=False, which="both", labelbottom=False, labeltop=False, labelleft=False, labelright=False) plt.xlabel(figure_options.xlabel, labelpad=30) plt.ylabel(figure_options.ylabel, labelpad=30) # save_figure(figure_options, fig) fig.savefig(next_name(env["pd-work"]), bbox_inches="tight") plt.show()
def viz_summary_per_gcfid_per_step(env, df): # type: (Environment, pd.DataFrame) -> None # gather analysis for steps A, A+B, and A+B+C list_df = list() # type: List[pd.DataFrame] # compute total number of predictions per tool, per genome for gcfid, df_group in df.groupby("GCFID", as_index=False): df.loc[df_group.index, "Total SBSP"] = df.loc[df_group.index, "SBSP"].sum() df.loc[df_group.index, "Total GMS2"] = df.loc[df_group.index, "GMS2"].sum() df.loc[df_group.index, "Total GMS2=SBSP"] = df.loc[df_group.index, "GMS2=SBSP"].sum() # loop over steps A, A+B, and A+B+C and collect stats tag = None for step in ["A", "B", "C"]: if tag is None: tag = step else: tag += "+" + step df_summary_per_gcfid = get_summary_per_gcfid( df[df["Predicted-at-step"] <= step]) df_summary_per_gcfid["SBSP Step"] = tag list_df.append(df_summary_per_gcfid) df_per_gcfid_per_step = pd.concat(list_df, sort=False) import matplotlib.pyplot as plt # fig, ax = plt.subplots() # # sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "SBSP", hue="GCFID", ax=ax, # sns_kwargs={"palette": CM.get_map("verified")}, # legend=False # ) # for l in ax.lines: # l.set_linestyle("--") # # ax2 = ax.twinx() # sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(SBSP,NCBI)", hue="GCFID", ax=ax2, # sns_kwargs={"palette": CM.get_map("verified")},) # # fo = FigureOptions( # xlabel="SBSP Step", # ylabel="Percentage", # # ylim=[0, 105], # save_fig=next_name(env["pd-work"]) # ) # FigureOptions.set_properties_for_axis(ax, fo) # plt.subplots_adjust(bottom=0.2) # handles, labels = ax.get_legend_handles_labels() # ax.legend(handles=handles[1:], labels=labels[1:], # loc="lower center", ncol=4, bbox_to_anchor=(0.5, -0.25)) # # plt.savefig(fo.save_fig) # plt.show() fig, axes = plt.subplots(3, 2, sharex="all", sharey="row") ax = axes[:, 0] sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(SBSP,NCBI)", hue="GCFID", ax=ax[0], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions( ylabel="Error rate (\%)", ylim=[0, 20], )) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Cov(SBSP,NCBI)", hue="GCFID", ax=ax[1], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Percentage\nof Genes", ylim=[0, None])) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "SBSP", hue="GCFID", ax=ax[2], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Number\nof Genes", ylim=[0, None])) fig.align_ylabels(ax) # plt.savefig(next_name(env["pd-work"])) # plt.show() # fig, ax = plt.subplots(3, 1, sharex="all") ax = axes[:, 1] sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Sen(GMS2=SBSP,NCBI)", hue="GCFID", ax=ax[0], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions( ylabel="Error", ylim=[0, None], )) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "Cov(GMS2=SBSP,NCBI)", hue="GCFID", ax=ax[1], sns_kwargs={"palette": CM.get_map("verified")}, legend=False, figure_options=FigureOptions(ylabel="Percentage of Genes", ylim=[0, None])) sns.lineplot(df_per_gcfid_per_step, "SBSP Step", "GMS2=SBSP", hue="GCFID", ax=ax[2], sns_kwargs={"palette": CM.get_map("verified")}, figure_options=FigureOptions(ylabel="Number of Genes", ylim=[0, None])) ax[2].get_legend().remove() fig.align_ylabels(ax) for ax in axes.ravel(): ax.set_xlabel("Steps") axes[0][0].set_title(TOOL) axes[0][1].set_title(TOOLp) fig.subplots_adjust(bottom=0.21) # handles, labels = ax.get_legend_handles_labels() # fig.legend(handles=handles[1:], labels=labels[1:], loc="lower center", ncol=4)#, bbox_to_anchor=(0.5, -0.25)) handles, labels = ax.get_legend_handles_labels() labels[0] = "Genome" fig.legend(handles=handles, labels=labels, loc="lower center", ncol=3) #, bbox_to_anchor=(0.5, -0.25)) plt.savefig(next_name(env["pd-work"])) plt.show() # three plots for gcfid, df_group in df.groupby("GCFID", as_index=False): df.loc[df_group.index, "Total SBSP"] = ((df_group["SBSP"]) & (df_group["NCBI"])).sum() df.loc[df_group.index, "Total GMS2"] = ((df_group["GMS2"]) & (df_group["NCBI"])).sum() df.loc[df_group.index, "Total GMS2=SBSP"] = ((df_group["GMS2=SBSP"]) & (df_group["NCBI"])).sum() df_all = get_summary_per_gcfid(df) # map column names for tables columns = [ "GCFID", "NCBI", "Sen(SBSP,NCBI)", "Sen(GMS2,NCBI)", "Sen(GMS2=SBSP,NCBI)", "Cov2(SBSP,NCBI)", "Cov2(GMS2,NCBI)", "Cov2(GMS2=SBSP,NCBI)" ] df_sen = df_all.copy()[columns].rename(columns={ "GCFID": "Genome", "NCBI": "Verified", "Sen(SBSP,NCBI)": "SBSP", "Sen(GMS2,NCBI)": "GMS2", "Sen(GMS2=SBSP,NCBI)": "GMS2=SBSP", }, inplace=False) df_sen[["Genome", "Verified", "SBSP", "GMS2", "GMS2=SBSP"]].to_csv(os_join(env["pd-work"], "sensitivity.csv"), index=False) # print(df_all[["GCFID", "NCBI", "Cov2(SBSP,NCBI)", "Cov2(GMS2,NCBI)", "Cov2(GMS2=SBSP,NCBI)"]].to_string(index=False)) df_cov = df_all[columns].rename(columns={ "GCFID": "Genome", "NCBI": "Verified", "Cov2(SBSP,NCBI)": "SBSP", "Cov2(GMS2,NCBI)": "GMS2", "Cov2(GMS2=SBSP,NCBI)": "GMS2=SBSP", }, inplace=False) df_cov[["Genome", "Verified", "SBSP", "GMS2", "GMS2=SBSP"]].to_csv(os_join(env["pd-work"], "coverage.csv"), index=False)
def main(env, args): # type: (Environment, argparse.Namespace) -> None df = pd.read_csv(args.pf_data) df["chunk-size"] /= 1000 import matplotlib.pyplot as plt fig, ax = plt.subplots() sns.lineplot(df[df["Tool"] == "SBSP"], "chunk-size", "percentage-common-3prime-and-5prime-from-common-3prime", hue="Genome", sns_kwargs={"palette": CM.get_map("verified"), "linestyle": "dashed"}, ax=ax, legend=False, figure_options=FigureOptions( xlabel="Chunk size (mb)", ylabel="Accuracy", ylim=[74, 101], save_fig=next_name(env["pd-work"]) )) for l in ax.lines: l.set_linestyle("--") sns.lineplot(df[df["Tool"] == "GMS2"], "chunk-size", "percentage-common-3prime-and-5prime-from-common-3prime", hue="Genome", sns_kwargs={"palette": CM.get_map("verified")}, legend_loc="best", legend_ncol=2, ax=ax) if args.with_mgm: y_max = ax.get_ylim()[1] ax.axvline(50, 0, y_max, color="grey", linestyle="dashed") ax.axhline(74, 5, 49, color="grey", linestyle="dashed") ax.annotate("MGM", (5, 72)) if "MGM" in set(df["Tool"]): sns.lineplot(df[df["Tool"] == "MGM"], "chunk-size", "percentage-common-3prime-and-5prime-from-common-3prime", hue="Genome", sns_kwargs={"palette": CM.get_map("verified"), "linestyle": "-."}, ax=ax, legend=False) for l in ax.lines[len(ax.lines)-5:]: l.set_linestyle(":") fo = FigureOptions( xlabel="Chunk size (mb)", ylabel="Accuracy", ylim=[74,101], save_fig=next_name(env["pd-work"]) ) FigureOptions.set_properties_for_axis(ax, fo) plt.savefig(fo.save_fig) plt.show()
def analyze_independent_predictions(max_candidates, sen_a, sen_b): # type: (int, float, float) -> None sensitivities = { "Random": sensitivity_random, "Independent": sensitivity_independent, "Fully dependent": sensitivity_fully_dependent } agree_given_pred = { "Random": agree_given_pred_random, "Independent": agree_given_pred_independent, "Fully dependent": agree_given_pred_fully_dependent } df = compute_data(sensitivities, agree_given_pred, max_candidates) plot_sensitivities_vs_num_candidates(sensitivities, max_candidates, sen_a, sen_b) sns.lineplot( df[(df["Sensitivity A"] == 0.9) & (df["Sensitivity B"] == 0.9)], "Number of candidates", "Probability", hue="Condition", sns_kwargs={"palette": CM.get_map("independence-conditions")}, legend_loc="best", figure_options=FigureOptions( save_fig=next_name("."), ylabel=r"$P(y=s|x_1=y, x_2=y)$", # xlim=[None, 40] )) # error df["1 - Probability"] = 1 - df["Probability"] sns.lineplot( df[(df["Sensitivity A"] == 0.9) & (df["Sensitivity B"] == 0.9)], "Number of candidates", "1 - Probability", hue="Condition", sns_kwargs={"palette": CM.get_map("independence-conditions")}, legend_loc="best", figure_options=FigureOptions( save_fig=next_name("."), ylabel=r"$P(y\neq s|x_1=y, x_2=y)$", # xlim=[None, 40] )) import matplotlib.pyplot as plt fig, axes = plt.subplots(1, 2, sharey="all", figsize=(10, 4)) sns.lineplot(df[(df["Sensitivity A"] == 0.9) & (df["Sensitivity B"] == 0.9)], "Number of candidates", "Probability", hue="Condition", sns_kwargs={"palette": CM.get_map("independence-conditions")}, ax=axes[0], legend=False, figure_options=FigureOptions(title="Sensitivity = 0.9", )) sns.lineplot(df[(df["Sensitivity A"] == df["Sensitivity B"]) & (df["Number of candidates"] == 25)], "Sensitivity A", "Probability", hue="Condition", ax=axes[1], sns_kwargs={"palette": CM.get_map("independence-conditions")}, figure_options=FigureOptions( ylim=[0, 1.05], xlim=[0, 1], xlabel="Sensitivity", title="Number of candidates = 25", )) save_figure(FigureOptions(save_fig=next_name(".")), fig) plt.show() df_tmp = df[(df["Sensitivity A"] == df["Sensitivity B"]) & (df["Condition"] == "Independent") & (df["Sensitivity A"].isin( {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9}))] df_tmp.rename(columns={"Sensitivity A": "Sensitivity"}, inplace=True) sns.lineplot( df_tmp, "Number of candidates", "Probability", hue="Sensitivity", figure_options=FigureOptions( # ylim=[0, 1.05], # xlim=[0, 1], title="Independent algorithms", save_fig=next_name(".")), ) # for condition in set(df["Condition"]): # # sns.kdeplot( # df[(df["Condition"] == condition) & (df["Sensitivity A"] == df["Sensitivity B"])], # "Sensitivity A", "Number of candidates", "Probability", # figure_options=FigureOptions( # title=condition # )) import matplotlib.pyplot as plt fig, axes = plt.subplots(1, 2, sharey="all", figsize=(10, 4)) sns.lineplot(df[(df["Sensitivity A"] == 0.9) & (df["Sensitivity B"] == 0.9)], "Number of candidates", "Agree given prediction", hue="Condition", sns_kwargs={"palette": CM.get_map("independence-conditions")}, ax=axes[0], legend=False, figure_options=FigureOptions(title="Sensitivity = 0.9", )) sns.lineplot(df[(df["Sensitivity A"] == df["Sensitivity B"]) & (df["Number of candidates"] == 25)], "Sensitivity A", "Agree given prediction", hue="Condition", ax=axes[1], sns_kwargs={"palette": CM.get_map("independence-conditions")}, figure_options=FigureOptions( ylim=[0, 1.05], xlim=[0, 1], xlabel="Sensitivity", title="Number of targets = 25", )) save_figure(FigureOptions(save_fig=next_name(".")), fig) plt.show()
def plot_candidate_stops(env, df): # type: (Environment, pd.DataFrame) -> None from sbsp_viz.colormap import ColorMap as CM plot_candidate_codons(env, df, ["TAA", "TAG", "TGA"], CM.get_map("stops"))