Exemplo n.º 1
0
 def setUp(self) -> None:
     # noinspection DuplicatedCode
     self.df = pd.DataFrame.from_dict(
         {1: {'x': "a", 'y': 15, 'color': 'blue'},
          2: {'x': "a", 'y': 16, 'color': 'blue'},
          3: {'x': "b", 'y': 17, 'color': 'blue'},
          4: {'x': "b", 'y': 18, 'color': 'blue'},
          5: {'x': "a", 'y': 15, 'color': 'red'},
          6: {'x': "a", 'y': 16, 'color': 'red'},
          7: {'x': "b", 'y': 17, 'color': 'red'},
          8: {'x': "b", 'y': 18, 'color': 'red'}
          }).T
     plotting = {
         "data": self.df,
         "x": "x",
         "y": "y",
         "hue": 'color'
     }
     self.ax = sns.boxplot(**plotting)
     self.annotator = Annotator(
         self.ax, pairs=[(("a", "blue"), ("a", "red")),
                         (("b", "blue"), ("b", "red")),
                         (("a", "blue"), ("b", "blue"))],
         **plotting)
     self.pvalues = [0.03, 0.04, 0.9]
Exemplo n.º 2
0
 def test_wrong_plotter_engine(self):
     ax = sns.barplot(**self.plotting)
     with self.assertRaisesRegex(NotImplementedError, "plotly"):
         self.annotator = Annotator(
             ax, plot="barplot", engine="plotly",
             pairs=[(("a", "blue"), ("a", "red")),
                    (("b", "blue"), ("b", "red")),
                    (("a", "blue"), ("b", "blue"))],
             **self.plotting)
Exemplo n.º 3
0
 def test_dodge_false_raises(self):
     ax = sns.barplot(dodge=False, **self.plotting)
     with self.assertRaisesRegex(ValueError, "dodge"):
         self.annotator = Annotator(
             ax, dodge=False, plot="barplot",
             pairs=[(("a", "blue"), ("a", "red")),
                    (("b", "blue"), ("b", "red")),
                    (("a", "blue"), ("b", "blue"))],
             **self.plotting)
Exemplo n.º 4
0
 def test_fixed_offset(self):
     ax = sns.barplot(**self.plotting)
     self.annotator = Annotator(
         ax, plot="barplot",
         pairs=[(("a", "blue"), ("a", "red")),
                (("b", "blue"), ("b", "red")),
                (("a", "blue"), ("b", "blue"))],
         **self.plotting)
     self.annotator.configure(test="Mann-Whitney", use_fixed_offset=True)
     self.annotator.apply_and_annotate()
Exemplo n.º 5
0
def boxplot_with_test(data, x, y, pairs):
    plotting_parameters = {'data': data, 'x': x, 'y': y}
    pvalues = [
        mannwhitneyu(data[data[x] == pair[0]][y],
                     data[data[x] == pair[1]][y]).pvalue for pair in pairs
    ]
    ax = sns.boxplot(**plotting_parameters)
    # Add annotations
    annotator = Annotator(ax, pairs, **plotting_parameters)
    annotator.set_pvalues(pvalues)
    annotator.annotate()
Exemplo n.º 6
0
 def test_orient_horizontal(self):
     plotting = {**self.plotting, 'orient': 'h',
                 'x': 'y', 'y': 'x', 'dodge': True}
     ax = sns.stripplot(**plotting)
     self.annotator = Annotator(
         ax, plot="stripplot",
         pairs=[(("a", "blue"), ("a", "red")),
                (("b", "blue"), ("b", "red")),
                (("a", "blue"), ("b", "blue"))],
         **plotting)
     self.annotator.configure(test="Mann-Whitney")
     self.annotator.apply_and_annotate()
Exemplo n.º 7
0
 def setUp(self) -> None:
     self.df = pd.DataFrame.from_dict({
         1: {
             "x": "a",
             "y": 15,
             "color": "blue"
         },
         2: {
             "x": "a",
             "y": 16,
             "color": "blue"
         },
         3: {
             "x": "b",
             "y": 17,
             "color": "blue"
         },
         4: {
             "x": "b",
             "y": 18,
             "color": "blue"
         },
         5: {
             "x": "a",
             "y": 15,
             "color": "red"
         },
         6: {
             "x": "a",
             "y": 16,
             "color": "red"
         },
         7: {
             "x": "b",
             "y": 17,
             "color": "red"
         },
         8: {
             "x": "b",
             "y": 18,
             "color": "red"
         }
     }).T
     plotting = {"data": self.df, "x": "x", "y": "y", "hue": "color"}
     self.ax = sns.boxplot(**plotting)
     self.annotator = Annotator(self.ax,
                                pairs=[(("a", "blue"), ("a", "red")),
                                       (("b", "blue"), ("b", "red")),
                                       (("a", "blue"), ("b", "blue"))],
                                verbose=False,
                                **plotting)
     self.pvalues = [0.03, 0.04, 0.9]
Exemplo n.º 8
0
 def test_plot_and_annotate_facets(self):
     annotator = Annotator(None, self.simple_pairs)
     g = sns.FacetGrid(self.params_df.pop("data"),
                       col=self.params_df.pop("hue"),
                       height=10,
                       sharey=False)
     self.params_df.pop("hue_order")
     g.map_dataframe(annotator.plot_and_annotate_facets,
                     plot="boxplot",
                     plot_params=self.params_df,
                     configuration={
                         'test': 'Mann-Whitney',
                         'text_format': 'simple'
                     },
                     annotation_func='apply_test',
                     ax_op_after=[['set_xlabel', ['Group'], None]],
                     annotation_params={'num_comparisons': 'auto'})
Exemplo n.º 9
0
 def test_order_in_x(self):
     with self.assertRaisesRegex(ValueError, "(specified in `order`)"):
         self.annot = Annotator(self.ax, [(0, 2)], data=self.data,
                                order=[0, 1, 2])
Exemplo n.º 10
0
 def test_unmatched_x_in_box_pairs_without_hue(self):
     with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"):
         self.annot = Annotator(self.ax, [(0, 2)], data=self.data)
Exemplo n.º 11
0
 def test_init_barplot(self):
     ax = sns.barplot(data=self.data)
     self.annot = Annotator(ax, [(0, 1)], plot="barplot", data=self.data)
Exemplo n.º 12
0
 def test_init_df(self):
     self.ax = sns.boxplot(**self.params_df)
     self.annot = Annotator(self.ax, pairs=self.pairs_for_df,
                            **self.params_df)
Exemplo n.º 13
0
 def test_init_simple(self):
     self.annot = Annotator(self.ax, [(0, 1)], data=self.data)
Exemplo n.º 14
0
 def test_comparisons_correction_by_name(self):
     self.ax = sns.boxplot(ax=self.ax, data=self.data2)
     annot = Annotator(self.ax, pairs=[("X", "Y")],
                       data=self.data2)
     annot.configure(test="Mann-Whitney", comparisons_correction="BH")
     annot.apply_and_annotate()
Exemplo n.º 15
0
                plotting_parameters = {
                    'data': tmp,
                    'x': 'classe_mortalidade',
                    'y': ind,
                    'order': ['A', 'B', 'C']
                }

                #tmp[['classe_mortalidade', i]].groupby('classe_mortalidade').mean()
                pvalues = []
                for p in pairs:
                    pvalues.append(
                        stats.ttest_ind(
                            tmp.loc[tmp.classe_mortalidade == p[0], ind],
                            tmp.loc[tmp.classe_mortalidade == p[1],
                                    ind]).pvalue)

                formatted_pvalues = [f"p={p:.2e}" for p in pvalues]
                ax = get_log_ax()
                sns.boxplot(**plotting_parameters).set_title(
                    'mz:{:.2f} rt:{:.2f}'.format(*data[k][0].loc[
                        ind, ['row m/z', 'row retention time']].tolist()))
                sns.stripplot(**plotting_parameters)
                annotator = Annotator(ax, pairs, **plotting_parameters)
                annotator.set_pvalues(pvalues)
                #annotator.set_custom_annotations(formatted_pvalues)
                annotator.annotate()
                #plt.savefig("plot1A.png", bbox_inches='tight')
                pdf.savefig()
                plt.close()
Exemplo n.º 16
0
def main():
    date = datetime.date.today().strftime("%Y%m%d")
    # passages = "p2-p12"
    # opv_passages = "p1-p7"
    # pv_passages = "p3-p8"
    # input_dir = "/Users/odedkushnir/Projects/fitness"
    # rv_replica1_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica1_syn/output/mutation/%s" % passages)
    # rv_replica1_mutation_data["Virus"] = "RVB14 #1"
    # rv_replica2_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica2_syn/output/mutation/%s" % passages)
    # rv_replica2_mutation_data["Virus"] = "RVB14 #2"
    # rv_replica3_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica3_syn/output/mutation/%s" % passages)
    # rv_replica3_mutation_data["Virus"] = "RVB14 #3"
    # cv_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/190627_RV_CV"
    #                                                   "/merged/CVB3/Rank0_data_mutation/fits/output/mutation/%s" % passages)
    # cv_mutation_data["Virus"] = "CVB3"
    # opv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/OPV/fits/output/mutation/"
    #                                         "all_positions_p1-p7")
    # opv_mutataion_data["Virus"] = "OPV2"
    #
    # pv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/Mahoney/fits/output/"
    #                                        "mutation/p3-p8")
    # pv_mutataion_data["Virus"] = "PV1"
    #
    #
    #
    # output_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/" \
    #                          "%s_fits_syn_plots" % date
    # try:
    #     os.mkdir(output_dir)
    # except OSError:
    #     print("Creation of the directory %s failed" % output_dir)
    # else:
    #     print("Successfully created the directory %s " % output_dir)
    #
    # all_data = pd.concat([rv_replica1_mutation_data, rv_replica2_mutation_data, rv_replica3_mutation_data,
    #                       cv_mutation_data, opv_mutataion_data, pv_mutataion_data], sort=False)
    # all_data = all_data.rename(columns={"allele0_1": "Transition rate"})
    # all_data["Transition rate"] = all_data["Transition rate"].astype(float)
    # # print(all_data.to_string())
    # # all_data = all_data.rename(columns={"inferred_mu": "Mutation rate"})
    # # # print(all_data["Mutation rate"].dtype)
    # # all_data["Mutation rate"] = all_data["Mutation rate"].map(lambda x: str(x).lstrip('*'))
    # # all_data["Mutation rate"] = pd.to_numeric(all_data["Mutation rate"], errors='coerce')#.astype(float)
    # # # print(all_data["Mutation rate"].dtype)
    # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x[0]+">"+x[1:]if len(x)<=2 else x)# if len(x)==2 else x[0]+">"+x[1:])
    # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x.split("_")[0] + "\n" + x.split("_")[-1] + "-like" if len(x)>3 else x)
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "nonadar", "A>G\nNon-ADAR-like", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "adar", "A>G\nADAR-like", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "AG", "A>G", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "UC", "U>C", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "GA", "G>A", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "CU", "C>U", all_data["Mutation"])
    # # all_data = all_data[(all_data["pos"] >= 5785) & (all_data["pos"] <= 7212)]
    #
    #
    #
    # # q1 = all_data["Transition rate"].quantile(0.25)
    # # q3 = all_data["Transition rate"].quantile(0.75)
    # # all_data = all_data[all_data["Transition rate"] > q1]
    # # all_data = all_data[all_data["Transition rate"] < q3]
    #
    # all_data = all_data[all_data["Mutation"] != "A>G\nADAR-like"]
    # all_data = all_data[all_data["Mutation"] != "A>G\nNon-ADAR-like"]
    # print(all_data.shape[0])
    # all_data.to_csv("/Users/odedkushnir/PhD_Projects/fitness/all_data.csv")

    #Plots - local
    all_data = pd.read_csv(
        "/Users/odedkushnir/PhD_Projects/fitness/all_data.csv")
    output_dir = "/Users/odedkushnir/PhD_Projects/fitness/{0}_fits_syn_plots".format(
        date)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    plt.style.use('classic')

    sns.set_palette("Set2")
    mutation_order = ["C>U", "G>A", "U>C", "A>G"]
    virus_order = ["RVB14 #1", "RVB14 #2", "RVB14 #3", "CVB3", "OPV2", "PV1"]
    g1 = sns.boxenplot(x="Mutation",
                       y="Transition rate",
                       data=all_data,
                       order=mutation_order,
                       hue="Virus",
                       hue_order=virus_order)
    g1.set_yscale("log")
    """[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]"""
    pairs = [(("A>G", "RVB14 #1"), ("C>U", "RVB14 #1")),
             (("A>G", "RVB14 #1"), ("G>A", "RVB14 #1")),
             (("A>G", "RVB14 #2"), ("C>U", "RVB14 #2")),
             (("A>G", "RVB14 #2"), ("G>A", "RVB14 #2")),
             (("A>G", "RVB14 #3"), ("C>U", "RVB14 #3")),
             (("A>G", "RVB14 #3"), ("G>A", "RVB14 #3")),
             (("A>G", "CVB3"), ("C>U", "CVB3")),
             (("A>G", "CVB3"), ("G>A", "CVB3")),
             (("A>G", "OPV2"), ("C>U", "OPV2")),
             (("A>G", "OPV2"), ("G>A", "OPV2")),
             (("A>G", "PV1"), ("C>U", "PV1")),
             (("A>G", "PV1"), ("G>A", "PV1")),
             (("U>C", "RVB14 #1"), ("C>U", "RVB14 #1")),
             (("U>C", "RVB14 #1"), ("G>A", "RVB14 #1")),
             (("U>C", "RVB14 #2"), ("C>U", "RVB14 #2")),
             (("U>C", "RVB14 #2"), ("G>A", "RVB14 #2")),
             (("U>C", "RVB14 #3"), ("C>U", "RVB14 #3")),
             (("U>C", "RVB14 #3"), ("G>A", "RVB14 #3")),
             (("U>C", "CVB3"), ("C>U", "CVB3")),
             (("U>C", "CVB3"), ("G>A", "CVB3")),
             (("U>C", "OPV2"), ("C>U", "OPV2")),
             (("U>C", "OPV2"), ("G>A", "OPV2")),
             (("U>C", "PV1"), ("C>U", "PV1")),
             (("U>C", "PV1"), ("G>A", "PV1"))]
    annotator = Annotator(g1,
                          pairs,
                          x="Mutation",
                          y="Transition rate",
                          data=all_data,
                          order=mutation_order,
                          hue="Virus",
                          hue_order=virus_order)
    annotator.configure(test='Mann-Whitney',
                        text_format='star',
                        loc='outside',
                        comparisons_correction="Bonferroni")
    annotator.apply_and_annotate()
    g1.set(xlabel="Type of mutation")
    g1.set(ylabel="Mutation rate inferred")
    g1.set_ylim(10**-10, 10**-1)
    g1.legend(loc='center left',
              bbox_to_anchor=(1.05, 0.5),
              borderaxespad=0.,
              fontsize=7)
    plt.savefig(output_dir + "/%s_mutation_rate.png" % date,
                dpi=600,
                bbox_inches='tight')
    plt.close()
Exemplo n.º 17
0
 def test_unmatched_x_in_box_pairs_with_hue(self):
     with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"):
         self.annot = Annotator(self.ax, [(("c", "blue"), ("b", "blue"))],
                                data=self.df, x="x", y="y",
                                order=["a", "b"], hue='color',
                                hue_order=['red', 'blue'])
Exemplo n.º 18
0
 def test_working_hue_orders(self):
     self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))],
                            data=self.df, x="x", y="y",
                            order=["a", "b"], hue='color',
                            hue_order=['red', 'blue'])
Exemplo n.º 19
0
def plots(input_dir, date, data_filter, virus, passage_order, transition_order, pairs, label_order, pairs_adar, filter_reads=None):
    output_dir = input_dir + date + "_plots"
    plus_minus = u"\u00B1"
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)
    if filter_reads is True:
        data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0, data_filter["no_variants"])
        data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000]
    mutation_order = ["A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U"]
    type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    # g1 = sns.catplot("label", "frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20",
    #                     kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus))
    # g1.set_xticklabels(fontsize=9, rotation=45)
    # g1.set(yscale='log')
    # g1.set(ylim=(10**-5, 10**-1))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()

    data_filter["passage"] = data_filter["passage"].astype(str)
    data_filter["passage"] = np.where(data_filter["passage"] != "RNA\nControl", "p" + data_filter["passage"], data_filter["passage"])
    g2 = sns.catplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", order=passage_order,
                     palette=mutation_palette(4)
                     , kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint,
                     orient="v")
    g2.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus))
    g2.set(yscale='log')
    g2.set(ylim=(10 ** -6, 10 ** -2))
    # g2.set_xticklabels(fontsize=10, rotation=45)
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #                   "/Transition_Mutations_point_plot_Mahoney", dpi=300)
    g2.savefig(output_dir + "/Transition_Mutations_point_plot_{0}".format(virus), dpi=300)
    plt.close()

    passage_g = sns.boxplot(x="passage", y="Frequency", data=data_filter, hue="Mutation", order=passage_order,
                            palette=mutation_palette(4), dodge=True, hue_order=transition_order)
    passage_g.set_yscale('log')
    passage_g.set_ylim(10 ** -6, 10 ** -1)
    passage_g.set(xlabel="Passage", ylabel="Variant Frequency")

    annot = Annotator(passage_g, pairs, x="passage", y="Frequency", hue="Mutation", data=data_filter,
                      order=passage_order, hue_order=transition_order)
    annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            passage_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_{0}".format(virus), dpi=300)
    plt.close()

    data_filter_synonymous = data_filter.loc[data_filter.Type == "Synonymous"]
    data_filter_synonymous["Mutation"] = np.where(((data_filter_synonymous["Mutation"] == "A>G") &
                                                   (data_filter_synonymous["5`_ADAR_Preference"] == "High")),
                                                  "High\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G")
                                                                                    & (data_filter_synonymous["5`_ADAR_Preference"] == "Intermediate")),
                                                                                   "Intermediate\nADAR-like\nA>G",
                                                                                   np.where(((data_filter_synonymous["Mutation"] == "A>G") &
                                                                                             (data_filter_synonymous["5`_ADAR_Preference"] == "Low")),
                                                                                            "Low\nADAR-like\nA>G",
                                                                                            data_filter_synonymous["Mutation"])))
    data_filter_synonymous["Mutation_adar"] = np.where(((data_filter_synonymous["Mutation"] == "U>C") &
                                                        (data_filter_synonymous["3`_ADAR_Preference"] == "High")),
                                                       "High\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C")
                                                                                         & (data_filter_synonymous["3`_ADAR_Preference"] == "Intermediate")),
                                                                                        "Intermediate\nADAR-like\nU>C",
                                                                                        np.where(((data_filter_synonymous["Mutation"] == "U>C") &
                                                                                                  (data_filter_synonymous["3`_ADAR_Preference"] == "Low")),
                                                                                                 "Low\nADAR-like\nU>C",
                                                                                                 data_filter_synonymous["Mutation"])))
    mutation_adar_order = ["High\nADAR-like\nA>G", "Low\nADAR-like\nA>G",
                           "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"]

    data_filter_synonymous["passage"] = data_filter_synonymous["passage"].astype(str)
    catplot_adar = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_synonymous, hue="Mutation_adar",
                               order=passage_order, palette=mutation_palette(4, adar=True), kind="point", dodge=0.5,
                               hue_order=mutation_adar_order, join=False, estimator=weighted_varaint, orient="v",
                               legend=True)
    catplot_adar.set_axis_labels("Passage", "Variant Frequency {0} CI=95%".format(plus_minus))
    catplot_adar.set(yscale='log')
    catplot_adar.set(ylim=(10 ** -6, 10 ** -2))
    plt.savefig(output_dir + "/adar_pref_mutation_point_plot_{0}.png".format(virus), dpi=300)
    plt.close()

    adar_g = sns.boxplot(x="passage", y="Frequency", data=data_filter_synonymous, hue="Mutation_adar",
                         order=passage_order, palette=mutation_palette(4, adar=True), dodge=True,
                         hue_order=mutation_adar_order)
    adar_g.set_yscale('log')
    adar_g.set_ylim(10 ** -6, 10 ** -1)
    adar_g.set(xlabel="Passage", ylabel="Variant Frequency")
    annot = Annotator(adar_g, pairs_adar, x="passage", y="Frequency", hue="Mutation_adar",
                      data=data_filter_synonymous, hue_order=mutation_adar_order, order=passage_order)
    annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts_adar.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            adar_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/adar_pref_mutation_box_plot_{0}.png".format(virus), dpi=300)
    plt.close()
Exemplo n.º 20
0
def main():
    # input_dir = "/Users/odedkushnir/Projects/fitness/AccuNGS/190627_RV_CV/RVB14/"
    # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/patients/"
    input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/patients/"
    prefix = "inosine_predict_context_freq0.01"
    date = datetime.today().strftime("%Y%m%d")
    output_dir = input_dir + "{0}_{1}".format(date, prefix)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl")
    data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl")
    data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl")
    data_filter["label"] = np.where(
        data_filter["label"] == "RNA Control\nPrimer ID", "RNA\nControl",
        data_filter["label"])

    #Plots
    label_order = [
        "RNA\nControl", "p3 Cell Culture\nControl", "Patient-1", "Patient-4",
        "Patient-5", "Patient-9", "Patient-16", "Patient-17", "Patient-20"
    ]
    mutation_order = [
        "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G",
        "C>A", "G>U"
    ]
    transition_order = ["A>G", "U>C", "G>A", "C>U"]
    type_order1 = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    context_order = ["UpA", "ApA", "CpA", "GpA"]
    type_order2 = ["Synonymous", "Non-Synonymous"]
    context_order_uc = ["UpA", "UpU", "UpG", "UpC"]
    type_order_ag = ["Synonymous", "Non-Synonymous", "NonCodingRegion"]
    adar_preference = ["High", "Intermediate", "Low"]
    plus_minus = u"\u00B1"
    pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "G>A")),
             (("Patient-1", "A>G"), ("Patient-1", "G>A")),
             (("Patient-4", "A>G"), ("Patient-4", "G>A")),
             (("Patient-5", "A>G"), ("Patient-5", "G>A")),
             (("Patient-9", "A>G"), ("Patient-9", "G>A")),
             (("Patient-16", "A>G"), ("Patient-16", "G>A")),
             (("Patient-17", "A>G"), ("Patient-17", "G>A")),
             (("Patient-20", "A>G"), ("Patient-20", "G>A")),
             (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "U>C")),
             (("Patient-1", "A>G"), ("Patient-1", "U>C")),
             (("Patient-4", "A>G"), ("Patient-4", "U>C")),
             (("Patient-5", "A>G"), ("Patient-5", "U>C")),
             (("Patient-9", "A>G"), ("Patient-9", "U>C")),
             (("Patient-16", "A>G"), ("Patient-16", "U>C")),
             (("Patient-17", "A>G"), ("Patient-17", "U>C")),
             (("Patient-20", "A>G"), ("Patient-20", "U>C")),
             (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "C>U")),
             (("Patient-1", "A>G"), ("Patient-1", "C>U")),
             (("Patient-4", "A>G"), ("Patient-4", "C>U")),
             (("Patient-5", "A>G"), ("Patient-5", "C>U")),
             (("Patient-9", "A>G"), ("Patient-9", "C>U")),
             (("Patient-16", "A>G"), ("Patient-16", "C>U")),
             (("Patient-17", "A>G"), ("Patient-17", "C>U")),
             (("Patient-20", "A>G"), ("Patient-20", "C>U"))]

    # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20",
    #                     kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    # g1.set_xticklabels(fontsize=9, rotation=90)
    # g1.set(yscale='log')
    # # g1.set(ylim=(10**-7, 10**-3))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()
    g2 = sns.catplot(x="label",
                     y="frac_and_weight",
                     data=data_filter,
                     hue="Mutation",
                     order=label_order,
                     palette=mutation_palette(4),
                     kind="point",
                     dodge=0.5,
                     hue_order=transition_order,
                     join=False,
                     estimator=weighted_varaint,
                     orient="v",
                     legend=True)
    g2.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g2.set(yscale='log')
    g2.set(ylim=(10**-5, 10**-3))
    # g2.set_yticklabels(fontsize=12)
    g2.set_xticklabels(fontsize=10, rotation=90)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300)
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #                   "/Fig9a_Transition_Mutations_point_plot_Patients", dpi=300)
    plt.close()
    data_filter["label"] = data_filter["label"].astype(str)
    data_filter["Frequency"] = data_filter["Frequency"].astype(float)
    passage_g = sns.boxplot(x="label",
                            y="Frequency",
                            data=data_filter,
                            hue="Mutation",
                            order=label_order,
                            palette=mutation_palette(4),
                            dodge=True,
                            hue_order=transition_order)
    passage_g.set_yscale('log')
    passage_g.set_ylim(10**-6, 10**-1)
    passage_g.set(xlabel="", ylabel="Variant Frequency")
    passage_g.set_xticklabels(labels=label_order, fontsize=10, rotation=90)

    annot = Annotator(passage_g,
                      pairs,
                      x="label",
                      y="Frequency",
                      hue="Mutation",
                      data=data_filter,
                      order=label_order,
                      hue_order=transition_order)
    annot.configure(test='t-test_welch',
                    text_format='star',
                    loc='outside',
                    verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            passage_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_patients",
                dpi=300)
    plt.close()

    # g_rna = sns.catplot(x="RNA", y="frac_and_weight", data=data_filter, hue="Mutation", order=rna_order,
    #                  palette="tab20", kind="point", dodge=True, hue_order=transition_order, join=False, estimator=weighted_varaint,
    #                  orient="v", legend=True)
    # g_rna.set_axis_labels("", "Variant Frequency")
    # g_rna.set(yscale='log')
    # g_rna.set(ylim=(10 ** -6, 10 ** -2))
    # # g2.set_yticklabels(fontsize=12)
    # g_rna.set_xticklabels(fontsize=10, rotation=45)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    # g_rna.savefig(output_dir + "/Transition_Mutations_point_RNA_plot", dpi=300)
    # plt.close()

    # A>G Prev Context
    flatui = ["#3498db", "#9b59b6"]
    g5 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_ag,
                     hue="ADAR_like",
                     order=label_order,
                     palette=mutation_palette(2),
                     kind="point",
                     dodge=True,
                     hue_order=[True, False],
                     estimator=weighted_varaint,
                     orient="v",
                     col="Type",
                     join=False,
                     col_order=type_order2)
    g5.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g5.set(yscale='log')
    g5.set(ylim=(7 * 10**-7, 4 * 10**-3))
    g5.set_xticklabels(rotation=90)
    # plt.show()
    g5.savefig(output_dir + "/Context_point_plot", dpi=300)
    # g5.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #            "/Fig9b_Context_point_plot_Patients", dpi=300)
    plt.close()

    mutation_ag = sns.catplot("label",
                              "frac_and_weight",
                              data=data_filter_ag,
                              hue="5`_ADAR_Preference",
                              palette=mutation_palette(3, adar=True, ag=True),
                              kind="point",
                              dodge=True,
                              estimator=weighted_varaint,
                              order=label_order,
                              orient="v",
                              col="Type",
                              join=False,
                              col_order=type_order_ag,
                              hue_order=adar_preference)
    mutation_ag.set(yscale="log")
    mutation_ag.set(ylim=(1 * 10**-5, 1 * 10**-2))
    mutation_ag.set_xticklabels(rotation=90)
    mutation_ag.fig.suptitle("A>G ADAR_like Mutation in RV patients", y=0.99)
    plt.subplots_adjust(top=0.85)
    mutation_ag.set_axis_labels(
        "", "Variant Frequency {} CI=95%".format(plus_minus))
    mutation_ag.savefig(output_dir + "/ag_ADAR_like_Mutation_col_patients.png",
                        dpi=300)
    plt.close()

    g6 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_ag,
                     hue="ADAR_like",
                     order=label_order,
                     palette=mutation_palette(2),
                     kind="point",
                     dodge=True,
                     hue_order=[True, False],
                     estimator=weighted_varaint,
                     orient="v",
                     join=False)
    g6.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g6.set(yscale='log')
    g6.set(ylim=(7 * 10**-7, 4 * 10**-3))
    g6.set_xticklabels(rotation=90)
    # plt.show()
    g6.savefig(output_dir + "/Context_point_all_mutations_type_plot", dpi=300)
    plt.close()

    g9 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_uc,
                     hue="Next",
                     order=label_order,
                     palette="tab20",
                     hue_order=context_order_uc,
                     estimator=weighted_varaint,
                     orient="v",
                     dodge=True,
                     kind="point",
                     col="Type",
                     join=False,
                     col_order=type_order2)
    g9.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g9.set(yscale='log')
    g9.set(ylim=(10**-5, 10**-2))
    g9.set_xticklabels(rotation=90)
    # plt.show()
    g9.savefig(output_dir + "/UC_Context_point_plot", dpi=300)
    plt.close()

    data_filter_ag_grouped = data_filter_ag.groupby(
        ["ADAR_like", "label",
         "Type"])["frac_and_weight"].agg(lambda x: weighted_varaint(x))
    data_filter_ag_grouped = data_filter_ag_grouped.reset_index()
    data_filter_ag_grouped = data_filter_ag_grouped.rename(
        columns={"frac_and_weight": "Frequency"})
    data_filter_ag_grouped["Frequency"] = data_filter_ag_grouped[
        "Frequency"].astype(float)
    print(data_filter_ag_grouped.to_string())

    data_filter_ag_grouped_silent = data_filter_ag_grouped[
        data_filter_ag_grouped["Type"] == "Synonymous"]
    data_filter_ag_grouped_silent = data_filter_ag_grouped_silent[
        data_filter_ag_grouped_silent["label"] == "Cell Cultureֿ\nControl"]
Exemplo n.º 21
0
 def test_unmatched_hue_in_hue_order(self):
     with self.assertRaisesRegex(ValueError, "(specified in `hue_order`)"):
         self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))],
                                data=self.df, x="x", y="y",
                                order=["a", "b"], hue='color',
                                hue_order=['red', 'yellow'])
Exemplo n.º 22
0
 def test_not_implemented_plot(self):
     with self.assertRaises(NotImplementedError):
         Annotator(self.ax, [(0, 1)], data=self.data, plot="thatplot")
Exemplo n.º 23
0
 def test_init_df_inverted(self):
     box_pairs = self.pairs_for_df[::-1]
     self.ax = sns.boxplot(**self.params_df)
     self.annot = Annotator(self.ax, pairs=box_pairs, **self.params_df)
Exemplo n.º 24
0
 def test_valid_parameters_df_data_only(self):
     self.ax = sns.boxplot(ax=self.ax, data=self.data2)
     annot = Annotator(self.ax, pairs=[("X", "Y")],
                       data=self.data2)
     annot.configure(test="Mann-Whitney").apply_and_annotate()
Exemplo n.º 25
0
def context_boxplot(context_loadings,
                    metadict,
                    included_factors=None,
                    group_order=None,
                    statistical_test='Mann-Whitney',
                    pval_correction='benjamini-hochberg',
                    text_format='star',
                    nrows=1,
                    figsize=(12, 6),
                    cmap='tab10',
                    title_size=14,
                    axis_label_size=12,
                    group_label_rotation=45,
                    ylabel='Context Loadings',
                    dot_color='lightsalmon',
                    dot_edge_color='brown',
                    filename=None,
                    verbose=False):
    '''
    Plots a boxplot to compare the loadings of context groups in each
    of the factors resulting from a tensor decomposition.

    Parameters
    ----------
    context_loadings : pandas.DataFrame
        Dataframe containing the loadings of each of the contexts
        from a tensor decomposition. Rows are contexts and columns
        are the factors obtained.

    metadict : dict
        A dictionary containing the groups where each of the contexts
        belong to. Keys corresponds to the indexes in `context_loadings`
        and values are the respective groups. For example:
        metadict={'Context 1' : 'Group 1', 'Context 2' : 'Group 1',
                  'Context 3' : 'Group 2', 'Context 4' : 'Group 2'}

    included_factors : list, default=None
        Factors to be included. Factor names must be the same as column elements
        in the context_loadings.

    group_order : list, default=None
        Order of the groups to plot the boxplots. Considering the
        example of the metadict, it could be:
        group_order=['Group 1', 'Group 2'] or
        group_order=['Group 2', 'Group 1']
        If None, the order that groups are found in `metadict`
        will be considered.

    statistical_test : str, default='Mann-Whitney'
        The statistical test to compare context groups within each factor.
        Options include:
        't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney',
        'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon', 'Kruskal'.

    pval_correction : str, default='benjamini-hochberg'
        Multiple test correction method to reduce false positives.
        Options include:
        'bonferroni', 'bonf', 'Bonferroni', 'holm-bonferroni', 'HB',
        'Holm-Bonferroni', 'holm', 'benjamini-hochberg', 'BH', 'fdr_bh',
        'Benjamini-Hochberg', 'fdr_by', 'Benjamini-Yekutieli', 'BY', None

    text_format : str, default='star'
        Format to display the results of the statistical test.
        Options are:

        - 'star', to display P- values < 1e-4 as "****"; < 1e-3 as "***";
                  < 1e-2 as "**"; < 0.05 as "*", and < 1 as "ns".
        - 'simple', to display P-values < 1e-5 as "1e-5"; < 1e-4 as "1e-4";
                  < 1e-3 as "0.001"; < 1e-2 as "0.01"; and < 5e-2 as "0.05".

    nrows : int, default=1
        Number of rows to generate the subplots.

    figsize : tuple, default=(12, 6)
        Size of the figure (width*height), each in inches.

    cmap : str, default='tab10'
        Name of the color palette for coloring the major groups of contexts.

    title_size : int, default=14
        Font size of the title in each of the factor boxplots.

    axis_label_size : int, default=12
        Font size of the labels for X and Y axes.

    group_label_rotation : int, default=45
        Angle of rotation for the tick labels in the X axis.

    ylabel : str, default='Context Loadings'
        Label for the Y axis.

    dot_color : str, default='lightsalmon'
        A matplotlib color for the dots representing individual contexts
        in the boxplot. For more info see:
        https://matplotlib.org/stable/gallery/color/named_colors.html

    dot_edge_color : str, default='brown'
        A matplotlib color for the edge of the dots in the boxplot.
        For more info see:
        https://matplotlib.org/stable/gallery/color/named_colors.html

    filename : str, default=None
        Path to save the figure of the elbow analysis. If None, the figure is not
        saved.

    verbose : boolean, default=None
        Whether printing out the result of the pairwise statistical tests
        in each of the factors

    Returns
    -------
    fig : matplotlib.figure.Figure
        A matplotlib figure.

    axes : matplotlib.axes.Axes or array of Axes
           Matplotlib axes representing the subplots containing the boxplots.
    '''
    if group_order is not None:
        assert len(set(group_order) & set(metadict.values())) == len(
            set(metadict.values())
        ), "All groups in `metadict` must be contained in `group_order`"
    else:
        group_order = list(set(metadict.values()))
    df = context_loadings.copy()

    if included_factors is None:
        factor_labels = list(df.columns)
    else:
        factor_labels = included_factors
    rank = len(factor_labels)
    df['Group'] = [metadict[idx] for idx in df.index]

    nrows = min([rank, nrows])
    ncols = int(np.ceil(rank / nrows))
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=ncols,
                             figsize=figsize,
                             sharey='none')

    if rank == 1:
        axs = np.array([axes])
    else:
        axs = axes.flatten()

    for i, factor in enumerate(factor_labels):
        ax = axs[i]
        x, y = 'Group', factor

        order = group_order

        # Plot the boxes
        ax = sns.boxplot(x=x,
                         y=y,
                         data=df,
                         order=order,
                         whis=[0, 100],
                         width=.6,
                         palette=cmap,
                         boxprops=dict(alpha=.5),
                         ax=ax)

        # Plot the dots
        sns.stripplot(x=x,
                      y=y,
                      data=df,
                      size=6,
                      order=order,
                      color=dot_color,
                      edgecolor=dot_edge_color,
                      linewidth=0.6,
                      jitter=False,
                      ax=ax)

        if statistical_test is not None:
            # Add annotations about statistical test
            from itertools import combinations

            pairs = list(combinations(order, 2))
            annotator = Annotator(ax=ax,
                                  pairs=pairs,
                                  data=df,
                                  x=x,
                                  y=y,
                                  order=order)
            annotator.configure(test=statistical_test,
                                text_format=text_format,
                                loc='inside',
                                comparisons_correction=pval_correction,
                                verbose=verbose)
            annotator.apply_and_annotate()

        ax.set_title(factor, fontsize=title_size)

        ax.set_xlabel('', fontsize=axis_label_size)
        if (i == 0) | (((i) % ncols) == 0):
            ax.set_ylabel(ylabel, fontsize=axis_label_size)
        else:
            ax.set_ylabel(' ', fontsize=axis_label_size)

        ax.set_xticklabels(ax.get_xticklabels(),
                           rotation=group_label_rotation,
                           rotation_mode='anchor',
                           va='bottom',
                           ha='right')

    # Remove extra subplots
    for j in range(i + 1, axs.shape[0]):
        ax = axs[j]
        ax.axis(False)

    if axes.shape[0] > 1:
        axes = axes.reshape(axes.shape[0], -1)
        fig.align_ylabels(axes[:, 0])

    plt.tight_layout(rect=[0, 0.03, 1, 0.99])
    if filename is not None:
        plt.savefig(filename, dpi=300, bbox_inches='tight')
    return fig, axes
Exemplo n.º 26
0
def main():
    # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/"
    """Local"""
    input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/passages/"
    prefix = "inosine_predict_context"
    date = datetime.today().strftime("%Y%m%d")
    output_dir = input_dir + "{0}_{1}".format(date, prefix)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl")
    data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl")
    data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl")
    data_filter["passage"] = data_filter["passage"].astype(int)
    data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0,
                                          data_filter["no_variants"])
    data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000]

    #Plots
    label_order = [
        "RNA Control\nRND", "RNA Control\nPrimer ID", "p2-1", "p2-2", "p2-3",
        "p5-1", "p5-2", "p5-3", "p8-1", "p8-2", "p8-3", "p10-2", "p10-3",
        "p12-1", "p12-2", "p12-3"
    ]
    mutation_order = [
        "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G",
        "C>A", "G>U"
    ]
    transition_order = ["A>G", "U>C", "G>A", "C>U"]
    type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    type_order_ag = ["Synonymous", "Non-Synonymous"]
    context_order = ["UpA", "ApA", "CpA", "GpA"]
    context_order_uc = ["UpU", "UpA", "UpC", "UpG"]
    adar_preference = ["High", "Intermediate", "Low"]
    plus_minus = u"\u00B1"

    # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order,
    #                  palette="Set2",
    #                  kind="point", dodge=False, hue_order=mutation_order, join=True, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("", "Variant Frequency")
    # g1.set_xticklabels(fontsize=9, rotation=45)
    # g1.set(yscale='log')
    # g1.set(ylim=(10 ** -7, 10 ** -3))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()
    #
    # g2 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order,
    #                  palette=mutation_palette(4), kind="point", dodge=True, hue_order=transition_order, join=False,
    #                  estimator=weighted_varaint,
    #                  orient="v", legend=True)
    # g2.set_axis_labels("", "Variant Frequency")
    # g2.set(yscale='log', ylim=(10 ** -6, 10 ** -2), xlim=(0, 12, 2))
    # # g2.set_yticklabels(fontsize=12)
    # g2.set_xticklabels(fontsize=9, rotation=90)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    # g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300)
    # plt.close()
    replica_lst = [1, 2, 3]
    for replica in replica_lst:
        data_filter_replica = data_filter[data_filter["replica"] == replica]
        data_filter_replica["passage"] = data_filter_replica["passage"].astype(
            str)
        data_filter_replica["passage"] = "p" + data_filter_replica["passage"]
        if replica == 2:
            data_filter_replica = pd.read_pickle(input_dir + prefix +
                                                 "/data_filter.pkl")
            data_filter_replica["passage"] = data_filter_replica[
                "passage"].astype(int)
            data_filter_replica["no_variants"] = np.where(
                data_filter_replica["Prob"] < 0.95, 0,
                data_filter_replica["no_variants"])
            data_filter_replica["Read_count"] = data_filter_replica[
                data_filter_replica["Read_count"] > 10000]
            data_filter_replica["passage"] = data_filter_replica[
                "passage"].astype(str)
            data_filter_replica[
                "passage"] = "p" + data_filter_replica["passage"]
            data_filter_replica["replica"] = np.where(
                data_filter_replica["passage"] == "p0", 2,
                data_filter_replica["replica"])
            data_filter_replica = data_filter_replica[
                data_filter_replica["replica"] == replica]
        data_filter_replica["passage"] = np.where(
            data_filter_replica["passage"] == "p0", "RNA\nControl",
            data_filter_replica["passage"])

        if replica == 1:
            passage_order = ["RNA\nControl", "p2", "p5", "p8", "p12"]
            pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
                     (("p2", "A>G"), ("p2", "G>A")),
                     (("p5", "A>G"), ("p5", "G>A")),
                     (("p8", "A>G"), ("p8", "G>A")),
                     (("p12", "A>G"), ("p12", "G>A")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
                     (("p2", "A>G"), ("p2", "U>C")),
                     (("p5", "A>G"), ("p5", "U>C")),
                     (("p8", "A>G"), ("p8", "U>C")),
                     (("p12", "A>G"), ("p12", "U>C")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
                     (("p2", "A>G"), ("p2", "C>U")),
                     (("p5", "A>G"), ("p5", "C>U")),
                     (("p8", "A>G"), ("p8", "C>U")),
                     (("p12", "A>G"), ("p12", "C>U"))]
            pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"),
                           ("RNA\nControl", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nA>G"),
                           ("p2", "Low\nADAR-like\nA>G")),
                          (("p5", "High\nADAR-like\nA>G"),
                           ("p5", "Low\nADAR-like\nA>G")),
                          (("p8", "High\nADAR-like\nA>G"),
                           ("p8", "Low\nADAR-like\nA>G")),
                          (("p12", "High\nADAR-like\nA>G"),
                           ("p12", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nU>C"),
                           ("p2", "Low\nADAR-like\nU>C")),
                          (("p5", "High\nADAR-like\nU>C"),
                           ("p5", "Low\nADAR-like\nU>C")),
                          (("p8", "High\nADAR-like\nU>C"),
                           ("p8", "Low\nADAR-like\nU>C")),
                          (("p12", "High\nADAR-like\nU>C"),
                           ("p12", "Low\nADAR-like\nU>C"))]
        else:
            passage_order = ["RNA\nControl", "p2", "p5", "p8", "p10", "p12"]
            pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
                     (("p2", "A>G"), ("p2", "G>A")),
                     (("p5", "A>G"), ("p5", "G>A")),
                     (("p8", "A>G"), ("p8", "G>A")),
                     (("p10", "A>G"), ("p10", "G>A")),
                     (("p12", "A>G"), ("p12", "G>A")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
                     (("p2", "A>G"), ("p2", "U>C")),
                     (("p5", "A>G"), ("p5", "U>C")),
                     (("p8", "A>G"), ("p8", "U>C")),
                     (("p10", "A>G"), ("p10", "U>C")),
                     (("p12", "A>G"), ("p12", "U>C")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
                     (("p2", "A>G"), ("p2", "C>U")),
                     (("p5", "A>G"), ("p5", "C>U")),
                     (("p8", "A>G"), ("p8", "C>U")),
                     (("p10", "A>G"), ("p10", "C>U")),
                     (("p12", "A>G"), ("p12", "C>U"))]
            pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"),
                           ("RNA\nControl", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nA>G"),
                           ("p2", "Low\nADAR-like\nA>G")),
                          (("p5", "High\nADAR-like\nA>G"),
                           ("p5", "Low\nADAR-like\nA>G")),
                          (("p8", "High\nADAR-like\nA>G"),
                           ("p8", "Low\nADAR-like\nA>G")),
                          (("p10", "High\nADAR-like\nA>G"),
                           ("p10", "Low\nADAR-like\nA>G")),
                          (("p12", "High\nADAR-like\nA>G"),
                           ("p12", "Low\nADAR-like\nA>G")),
                          (("RNA\nControl", "High\nADAR-like\nU>C"),
                           ("RNA\nControl", "Low\nADAR-like\nU>C")),
                          (("p2", "High\nADAR-like\nU>C"),
                           ("p2", "Low\nADAR-like\nU>C")),
                          (("p5", "High\nADAR-like\nU>C"),
                           ("p5", "Low\nADAR-like\nU>C")),
                          (("p8", "High\nADAR-like\nU>C"),
                           ("p8", "Low\nADAR-like\nU>C")),
                          (("p10", "High\nADAR-like\nU>C"),
                           ("p10", "Low\nADAR-like\nU>C")),
                          (("p12", "High\nADAR-like\nU>C"),
                           ("p12", "Low\nADAR-like\nU>C"))]

        passage_g = sns.catplot(x="passage",
                                y="frac_and_weight",
                                data=data_filter_replica,
                                hue="Mutation",
                                order=passage_order,
                                palette=mutation_palette(4),
                                kind="point",
                                dodge=0.5,
                                hue_order=transition_order,
                                join=False,
                                estimator=weighted_varaint,
                                orient="v",
                                legend=True)
        passage_g.set_axis_labels(
            "Passage", "Variant Frequency {} CI=95%".format(plus_minus))
        passage_g.set(yscale='log', ylim=(10**-6, 10**-2))
        plt.savefig(
            output_dir +
            "/Transition_Mutations_point_plot_RVB14_replica%s" % str(replica),
            dpi=300)
        plt.close()

        passage_g1 = sns.boxplot(x="passage",
                                 y="Frequency",
                                 data=data_filter_replica,
                                 hue="Mutation",
                                 order=passage_order,
                                 palette=mutation_palette(4),
                                 dodge=True,
                                 hue_order=transition_order)
        passage_g1.set_yscale('log')
        passage_g1.set_ylim(10**-6, 10**-2)
        passage_g1.set(xlabel="Passage", ylabel="Variant Frequency")
        annot = Annotator(passage_g1,
                          pairs,
                          x="passage",
                          y="Frequency",
                          hue="Mutation",
                          data=data_filter_replica,
                          order=passage_order,
                          hue_order=transition_order)
        annot.configure(test='t-test_welch',
                        text_format='star',
                        loc='outside',
                        verbose=2,
                        comparisons_correction="Bonferroni")
        annot.apply_test()
        file_path = output_dir + "/sts{0}.csv".format(replica)
        with open(file_path, "w") as o:
            with contextlib.redirect_stdout(o):
                passage_g1, test_results = annot.annotate()
        plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
        plt.tight_layout()
        plt.savefig(
            output_dir +
            "/Transition_Mutations_box_stat_plot_RVB14_replica{0}".format(
                replica),
            dpi=300)
        plt.close()
        # data_filter["passage"] = data_filter["passage"].astype(int)
        #
        #
        # g4 = sns.relplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", palette=mutation_palette(4),
        #                  hue_order=transition_order, estimator=weighted_varaint, col="Type", kind="line",
        #                  col_order=type_order)
        #
        # g4.axes.flat[0].set_yscale('symlog', linthreshy=10 ** -5)
        # g4.set_axis_labels("Passage", "Variant Frequency")
        # # plt.show()
        # g4.savefig(output_dir + "/Time_Transition_Mutations_line_plot", dpi=300)
        # plt.close()
        """ADAR preferences"""
        data_filter_replica_synonymous = data_filter_replica.loc[
            data_filter_replica.Type == "Synonymous"]
        # data_filter_synonymous["ADAR_like"] = (data_filter_synonymous.Prev.str.contains('UpA') | data_filter_synonymous.Prev.str.contains('ApA'))
        data_filter_replica_synonymous["Mutation"] = np.where(
            ((data_filter_replica_synonymous["Mutation"] == "A>G") &
             (data_filter_replica_synonymous["5`_ADAR_Preference"] == "High")),
            "High\nADAR-like\nA>G",
            np.where(
                ((data_filter_replica_synonymous["Mutation"] == "A>G")
                 & (data_filter_replica_synonymous["5`_ADAR_Preference"]
                    == "Intermediate")), "Intermediate\nADAR-like\nA>G",
                np.where(
                    ((data_filter_replica_synonymous["Mutation"] == "A>G") &
                     (data_filter_replica_synonymous["5`_ADAR_Preference"]
                      == "Low")), "Low\nADAR-like\nA>G",
                    data_filter_replica_synonymous["Mutation"])))
        data_filter_replica_synonymous["Mutation_adar"] = np.where(
            ((data_filter_replica_synonymous["Mutation"] == "U>C") &
             (data_filter_replica_synonymous["3`_ADAR_Preference"] == "High")),
            "High\nADAR-like\nU>C",
            np.where(
                ((data_filter_replica_synonymous["Mutation"] == "U>C")
                 & (data_filter_replica_synonymous["3`_ADAR_Preference"]
                    == "Intermediate")), "Intermediate\nADAR-like\nU>C",
                np.where(
                    ((data_filter_replica_synonymous["Mutation"] == "U>C") &
                     (data_filter_replica_synonymous["3`_ADAR_Preference"]
                      == "Low")), "Low\nADAR-like\nU>C",
                    data_filter_replica_synonymous["Mutation"])))
        mutation_adar_order = [
            "High\nADAR-like\nA>G", "Low\nADAR-like\nA>G",
            "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"
        ]
        # data_filter_replica_synonymous["passage"] = data_filter_replica_synonymous["passage"].astype(str)
        # data_filter_replica_synonymous["passage"] = "p" + data_filter_replica_synonymous["passage"]
        catplot_adar = sns.catplot(x="passage",
                                   y="frac_and_weight",
                                   data=data_filter_replica_synonymous,
                                   hue="Mutation_adar",
                                   order=passage_order,
                                   palette=mutation_palette(4, adar=True),
                                   kind="point",
                                   dodge=0.5,
                                   hue_order=mutation_adar_order,
                                   join=False,
                                   estimator=weighted_varaint,
                                   orient="v",
                                   legend=True)
        catplot_adar.set_axis_labels(
            "Passage", "Variant Frequency {} CI=95%".format(plus_minus))
        catplot_adar.set(yscale='log')
        catplot_adar.set(ylim=(10**-6, 10**-2))
        # catplot_adar.set_xticklabels(fontsize=8)
        # plt.tight_layout()
        plt.savefig(
            output_dir +
            "/adar_pref_mutation_point_plot_RVB14_replica{0}.png".format(
                replica),
            dpi=300)
        plt.close()

        adar_g = sns.boxplot(x="passage",
                             y="Frequency",
                             data=data_filter_replica_synonymous,
                             hue="Mutation_adar",
                             order=passage_order,
                             palette=mutation_palette(4, adar=True),
                             dodge=True,
                             hue_order=mutation_adar_order)
        adar_g.set_yscale('log')
        adar_g.set_ylim(10**-6, 10**-1)
        adar_g.set(xlabel="Passage", ylabel="Variant Frequency")

        annot = Annotator(adar_g,
                          pairs_adar,
                          x="passage",
                          y="Frequency",
                          hue="Mutation_adar",
                          data=data_filter_replica_synonymous,
                          hue_order=mutation_adar_order)
        annot.configure(test='t-test_welch',
                        text_format='star',
                        loc='outside',
                        verbose=2,
                        comparisons_correction="Bonferroni")
        annot.apply_test()
        file_path = output_dir + "/sts_adar_{0}.csv".format(replica)
        with open(file_path, "w") as o:
            with contextlib.redirect_stdout(o):
                adar_g, test_results = annot.annotate()
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.tight_layout()
        plt.savefig(
            output_dir +
            "/adar_pref_mutation_box_stat_plot_RVB14_replica{0}".format(
                replica),
            dpi=300)
        plt.close()