Пример #1
0
def main():
    date = datetime.date.today().strftime("%Y%m%d")
    # passages = "p2-p12"
    # opv_passages = "p1-p7"
    # pv_passages = "p3-p8"
    # input_dir = "/Users/odedkushnir/Projects/fitness"
    # rv_replica1_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica1_syn/output/mutation/%s" % passages)
    # rv_replica1_mutation_data["Virus"] = "RVB14 #1"
    # rv_replica2_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica2_syn/output/mutation/%s" % passages)
    # rv_replica2_mutation_data["Virus"] = "RVB14 #2"
    # rv_replica3_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/"
    #                                                "20201008RV-202329127/merged/passages/fits_all_pos_at_once_sampling/"
    #                                                "replica3_syn/output/mutation/%s" % passages)
    # rv_replica3_mutation_data["Virus"] = "RVB14 #3"
    # cv_mutation_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/190627_RV_CV"
    #                                                   "/merged/CVB3/Rank0_data_mutation/fits/output/mutation/%s" % passages)
    # cv_mutation_data["Virus"] = "CVB3"
    # opv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/OPV/fits/output/mutation/"
    #                                         "all_positions_p1-p7")
    # opv_mutataion_data["Virus"] = "OPV2"
    #
    # pv_mutataion_data = post_data_mutation("/Volumes/STERNADILABHOME$/volume3/okushnir/CirSeq/Mahoney/fits/output/"
    #                                        "mutation/p3-p8")
    # pv_mutataion_data["Virus"] = "PV1"
    #
    #
    #
    # output_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/" \
    #                          "%s_fits_syn_plots" % date
    # try:
    #     os.mkdir(output_dir)
    # except OSError:
    #     print("Creation of the directory %s failed" % output_dir)
    # else:
    #     print("Successfully created the directory %s " % output_dir)
    #
    # all_data = pd.concat([rv_replica1_mutation_data, rv_replica2_mutation_data, rv_replica3_mutation_data,
    #                       cv_mutation_data, opv_mutataion_data, pv_mutataion_data], sort=False)
    # all_data = all_data.rename(columns={"allele0_1": "Transition rate"})
    # all_data["Transition rate"] = all_data["Transition rate"].astype(float)
    # # print(all_data.to_string())
    # # all_data = all_data.rename(columns={"inferred_mu": "Mutation rate"})
    # # # print(all_data["Mutation rate"].dtype)
    # # all_data["Mutation rate"] = all_data["Mutation rate"].map(lambda x: str(x).lstrip('*'))
    # # all_data["Mutation rate"] = pd.to_numeric(all_data["Mutation rate"], errors='coerce')#.astype(float)
    # # # print(all_data["Mutation rate"].dtype)
    # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x[0]+">"+x[1:]if len(x)<=2 else x)# if len(x)==2 else x[0]+">"+x[1:])
    # # all_data["Mutation"] = all_data["Mutation"].apply(lambda x: x.split("_")[0] + "\n" + x.split("_")[-1] + "-like" if len(x)>3 else x)
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "nonadar", "A>G\nNon-ADAR-like", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "adar", "A>G\nADAR-like", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "AG", "A>G", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "UC", "U>C", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "GA", "G>A", all_data["Mutation"])
    # all_data["Mutation"] = np.where(all_data["Mutation"] == "CU", "C>U", all_data["Mutation"])
    # # all_data = all_data[(all_data["pos"] >= 5785) & (all_data["pos"] <= 7212)]
    #
    #
    #
    # # q1 = all_data["Transition rate"].quantile(0.25)
    # # q3 = all_data["Transition rate"].quantile(0.75)
    # # all_data = all_data[all_data["Transition rate"] > q1]
    # # all_data = all_data[all_data["Transition rate"] < q3]
    #
    # all_data = all_data[all_data["Mutation"] != "A>G\nADAR-like"]
    # all_data = all_data[all_data["Mutation"] != "A>G\nNon-ADAR-like"]
    # print(all_data.shape[0])
    # all_data.to_csv("/Users/odedkushnir/PhD_Projects/fitness/all_data.csv")

    #Plots - local
    all_data = pd.read_csv(
        "/Users/odedkushnir/PhD_Projects/fitness/all_data.csv")
    output_dir = "/Users/odedkushnir/PhD_Projects/fitness/{0}_fits_syn_plots".format(
        date)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    plt.style.use('classic')

    sns.set_palette("Set2")
    mutation_order = ["C>U", "G>A", "U>C", "A>G"]
    virus_order = ["RVB14 #1", "RVB14 #2", "RVB14 #3", "CVB3", "OPV2", "PV1"]
    g1 = sns.boxenplot(x="Mutation",
                       y="Transition rate",
                       data=all_data,
                       order=mutation_order,
                       hue="Virus",
                       hue_order=virus_order)
    g1.set_yscale("log")
    """[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]"""
    pairs = [(("A>G", "RVB14 #1"), ("C>U", "RVB14 #1")),
             (("A>G", "RVB14 #1"), ("G>A", "RVB14 #1")),
             (("A>G", "RVB14 #2"), ("C>U", "RVB14 #2")),
             (("A>G", "RVB14 #2"), ("G>A", "RVB14 #2")),
             (("A>G", "RVB14 #3"), ("C>U", "RVB14 #3")),
             (("A>G", "RVB14 #3"), ("G>A", "RVB14 #3")),
             (("A>G", "CVB3"), ("C>U", "CVB3")),
             (("A>G", "CVB3"), ("G>A", "CVB3")),
             (("A>G", "OPV2"), ("C>U", "OPV2")),
             (("A>G", "OPV2"), ("G>A", "OPV2")),
             (("A>G", "PV1"), ("C>U", "PV1")),
             (("A>G", "PV1"), ("G>A", "PV1")),
             (("U>C", "RVB14 #1"), ("C>U", "RVB14 #1")),
             (("U>C", "RVB14 #1"), ("G>A", "RVB14 #1")),
             (("U>C", "RVB14 #2"), ("C>U", "RVB14 #2")),
             (("U>C", "RVB14 #2"), ("G>A", "RVB14 #2")),
             (("U>C", "RVB14 #3"), ("C>U", "RVB14 #3")),
             (("U>C", "RVB14 #3"), ("G>A", "RVB14 #3")),
             (("U>C", "CVB3"), ("C>U", "CVB3")),
             (("U>C", "CVB3"), ("G>A", "CVB3")),
             (("U>C", "OPV2"), ("C>U", "OPV2")),
             (("U>C", "OPV2"), ("G>A", "OPV2")),
             (("U>C", "PV1"), ("C>U", "PV1")),
             (("U>C", "PV1"), ("G>A", "PV1"))]
    annotator = Annotator(g1,
                          pairs,
                          x="Mutation",
                          y="Transition rate",
                          data=all_data,
                          order=mutation_order,
                          hue="Virus",
                          hue_order=virus_order)
    annotator.configure(test='Mann-Whitney',
                        text_format='star',
                        loc='outside',
                        comparisons_correction="Bonferroni")
    annotator.apply_and_annotate()
    g1.set(xlabel="Type of mutation")
    g1.set(ylabel="Mutation rate inferred")
    g1.set_ylim(10**-10, 10**-1)
    g1.legend(loc='center left',
              bbox_to_anchor=(1.05, 0.5),
              borderaxespad=0.,
              fontsize=7)
    plt.savefig(output_dir + "/%s_mutation_rate.png" % date,
                dpi=600,
                bbox_inches='tight')
    plt.close()
Пример #2
0
def context_boxplot(context_loadings,
                    metadict,
                    included_factors=None,
                    group_order=None,
                    statistical_test='Mann-Whitney',
                    pval_correction='benjamini-hochberg',
                    text_format='star',
                    nrows=1,
                    figsize=(12, 6),
                    cmap='tab10',
                    title_size=14,
                    axis_label_size=12,
                    group_label_rotation=45,
                    ylabel='Context Loadings',
                    dot_color='lightsalmon',
                    dot_edge_color='brown',
                    filename=None,
                    verbose=False):
    '''
    Plots a boxplot to compare the loadings of context groups in each
    of the factors resulting from a tensor decomposition.

    Parameters
    ----------
    context_loadings : pandas.DataFrame
        Dataframe containing the loadings of each of the contexts
        from a tensor decomposition. Rows are contexts and columns
        are the factors obtained.

    metadict : dict
        A dictionary containing the groups where each of the contexts
        belong to. Keys corresponds to the indexes in `context_loadings`
        and values are the respective groups. For example:
        metadict={'Context 1' : 'Group 1', 'Context 2' : 'Group 1',
                  'Context 3' : 'Group 2', 'Context 4' : 'Group 2'}

    included_factors : list, default=None
        Factors to be included. Factor names must be the same as column elements
        in the context_loadings.

    group_order : list, default=None
        Order of the groups to plot the boxplots. Considering the
        example of the metadict, it could be:
        group_order=['Group 1', 'Group 2'] or
        group_order=['Group 2', 'Group 1']
        If None, the order that groups are found in `metadict`
        will be considered.

    statistical_test : str, default='Mann-Whitney'
        The statistical test to compare context groups within each factor.
        Options include:
        't-test_ind', 't-test_welch', 't-test_paired', 'Mann-Whitney',
        'Mann-Whitney-gt', 'Mann-Whitney-ls', 'Levene', 'Wilcoxon', 'Kruskal'.

    pval_correction : str, default='benjamini-hochberg'
        Multiple test correction method to reduce false positives.
        Options include:
        'bonferroni', 'bonf', 'Bonferroni', 'holm-bonferroni', 'HB',
        'Holm-Bonferroni', 'holm', 'benjamini-hochberg', 'BH', 'fdr_bh',
        'Benjamini-Hochberg', 'fdr_by', 'Benjamini-Yekutieli', 'BY', None

    text_format : str, default='star'
        Format to display the results of the statistical test.
        Options are:

        - 'star', to display P- values < 1e-4 as "****"; < 1e-3 as "***";
                  < 1e-2 as "**"; < 0.05 as "*", and < 1 as "ns".
        - 'simple', to display P-values < 1e-5 as "1e-5"; < 1e-4 as "1e-4";
                  < 1e-3 as "0.001"; < 1e-2 as "0.01"; and < 5e-2 as "0.05".

    nrows : int, default=1
        Number of rows to generate the subplots.

    figsize : tuple, default=(12, 6)
        Size of the figure (width*height), each in inches.

    cmap : str, default='tab10'
        Name of the color palette for coloring the major groups of contexts.

    title_size : int, default=14
        Font size of the title in each of the factor boxplots.

    axis_label_size : int, default=12
        Font size of the labels for X and Y axes.

    group_label_rotation : int, default=45
        Angle of rotation for the tick labels in the X axis.

    ylabel : str, default='Context Loadings'
        Label for the Y axis.

    dot_color : str, default='lightsalmon'
        A matplotlib color for the dots representing individual contexts
        in the boxplot. For more info see:
        https://matplotlib.org/stable/gallery/color/named_colors.html

    dot_edge_color : str, default='brown'
        A matplotlib color for the edge of the dots in the boxplot.
        For more info see:
        https://matplotlib.org/stable/gallery/color/named_colors.html

    filename : str, default=None
        Path to save the figure of the elbow analysis. If None, the figure is not
        saved.

    verbose : boolean, default=None
        Whether printing out the result of the pairwise statistical tests
        in each of the factors

    Returns
    -------
    fig : matplotlib.figure.Figure
        A matplotlib figure.

    axes : matplotlib.axes.Axes or array of Axes
           Matplotlib axes representing the subplots containing the boxplots.
    '''
    if group_order is not None:
        assert len(set(group_order) & set(metadict.values())) == len(
            set(metadict.values())
        ), "All groups in `metadict` must be contained in `group_order`"
    else:
        group_order = list(set(metadict.values()))
    df = context_loadings.copy()

    if included_factors is None:
        factor_labels = list(df.columns)
    else:
        factor_labels = included_factors
    rank = len(factor_labels)
    df['Group'] = [metadict[idx] for idx in df.index]

    nrows = min([rank, nrows])
    ncols = int(np.ceil(rank / nrows))
    fig, axes = plt.subplots(nrows=nrows,
                             ncols=ncols,
                             figsize=figsize,
                             sharey='none')

    if rank == 1:
        axs = np.array([axes])
    else:
        axs = axes.flatten()

    for i, factor in enumerate(factor_labels):
        ax = axs[i]
        x, y = 'Group', factor

        order = group_order

        # Plot the boxes
        ax = sns.boxplot(x=x,
                         y=y,
                         data=df,
                         order=order,
                         whis=[0, 100],
                         width=.6,
                         palette=cmap,
                         boxprops=dict(alpha=.5),
                         ax=ax)

        # Plot the dots
        sns.stripplot(x=x,
                      y=y,
                      data=df,
                      size=6,
                      order=order,
                      color=dot_color,
                      edgecolor=dot_edge_color,
                      linewidth=0.6,
                      jitter=False,
                      ax=ax)

        if statistical_test is not None:
            # Add annotations about statistical test
            from itertools import combinations

            pairs = list(combinations(order, 2))
            annotator = Annotator(ax=ax,
                                  pairs=pairs,
                                  data=df,
                                  x=x,
                                  y=y,
                                  order=order)
            annotator.configure(test=statistical_test,
                                text_format=text_format,
                                loc='inside',
                                comparisons_correction=pval_correction,
                                verbose=verbose)
            annotator.apply_and_annotate()

        ax.set_title(factor, fontsize=title_size)

        ax.set_xlabel('', fontsize=axis_label_size)
        if (i == 0) | (((i) % ncols) == 0):
            ax.set_ylabel(ylabel, fontsize=axis_label_size)
        else:
            ax.set_ylabel(' ', fontsize=axis_label_size)

        ax.set_xticklabels(ax.get_xticklabels(),
                           rotation=group_label_rotation,
                           rotation_mode='anchor',
                           va='bottom',
                           ha='right')

    # Remove extra subplots
    for j in range(i + 1, axs.shape[0]):
        ax = axs[j]
        ax.axis(False)

    if axes.shape[0] > 1:
        axes = axes.reshape(axes.shape[0], -1)
        fig.align_ylabels(axes[:, 0])

    plt.tight_layout(rect=[0, 0.03, 1, 0.99])
    if filename is not None:
        plt.savefig(filename, dpi=300, bbox_inches='tight')
    return fig, axes
Пример #3
0
 def test_comparisons_correction_by_name(self):
     self.ax = sns.boxplot(ax=self.ax, data=self.data2)
     annot = Annotator(self.ax, pairs=[("X", "Y")],
                       data=self.data2)
     annot.configure(test="Mann-Whitney", comparisons_correction="BH")
     annot.apply_and_annotate()
Пример #4
0
class Test(unittest.TestCase):
    """Test that the annotations match the pvalues and format."""
    def setUp(self) -> None:
        self.df = pd.DataFrame.from_dict({
            1: {
                "x": "a",
                "y": 15,
                "color": "blue"
            },
            2: {
                "x": "a",
                "y": 16,
                "color": "blue"
            },
            3: {
                "x": "b",
                "y": 17,
                "color": "blue"
            },
            4: {
                "x": "b",
                "y": 18,
                "color": "blue"
            },
            5: {
                "x": "a",
                "y": 15,
                "color": "red"
            },
            6: {
                "x": "a",
                "y": 16,
                "color": "red"
            },
            7: {
                "x": "b",
                "y": 17,
                "color": "red"
            },
            8: {
                "x": "b",
                "y": 18,
                "color": "red"
            }
        }).T
        plotting = {"data": self.df, "x": "x", "y": "y", "hue": "color"}
        self.ax = sns.boxplot(**plotting)
        self.annotator = Annotator(self.ax,
                                   pairs=[(("a", "blue"), ("a", "red")),
                                          (("b", "blue"), ("b", "red")),
                                          (("a", "blue"), ("b", "blue"))],
                                   verbose=False,
                                   **plotting)
        self.pvalues = [0.03, 0.04, 0.9]

    def test_format_simple(self):
        self.annotator.configure(pvalue_format={"text_format": "simple"})
        annotations = self.annotator._get_results("auto", pvalues=self.pvalues)
        self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"],
                         [annotation.text for annotation in annotations])

    def test_format_simple_in_annotator(self):
        self.annotator.configure(text_format="simple")
        annotations = self.annotator._get_results("auto", pvalues=self.pvalues)
        self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"],
                         [annotation.text for annotation in annotations])

    def test_wrong_parameter(self):
        with self.assertRaisesRegex(InvalidParametersError,
                                    re.escape('parameter(s) "that"')):
            self.annotator.configure(pvalue_format={"that": "whatever"})

    def test_format_string(self):
        self.annotator.configure(text_format="simple",
                                 pvalue_format_string="{:.3f}")
        self.assertEqual("{:.3f}",
                         self.annotator.pvalue_format.pvalue_format_string)
        annotations = self.annotator._get_results("auto", pvalues=self.pvalues)
        self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.900"],
                         [annotation.text for annotation in annotations])

    def test_format_string_default(self):
        self.annotator.configure(text_format="simple",
                                 pvalue_format_string="{:.3f}")
        self.annotator.pvalue_format.config(pvalue_format_string=DEFAULT)

        annotations = self.annotator._get_results("auto", pvalues=self.pvalues)
        self.assertEqual(["p ≤ 0.05", "p ≤ 0.05", "p = 0.90"],
                         [annotation.text for annotation in annotations])

    @unittest.mock.patch('sys.stdout', new_callable=io.StringIO)
    def assert_print_pvalue(self, pvalue_format, expected_output, mock_stdout):
        pvalue_format.print_legend_if_used()
        self.assertEqual(expected_output, mock_stdout.getvalue())

    def test_print_pvalue_default(self):
        pvalue_format = PValueFormat()
        self.assert_print_pvalue(
            pvalue_format, "p-value annotation legend:\n"
            "      ns: p <= 1.00e+00\n"
            "       *: 1.00e-02 < p <= 5.00e-02\n"
            "      **: 1.00e-03 < p <= 1.00e-02\n"
            "     ***: 1.00e-04 < p <= 1.00e-03\n"
            "    ****: p <= 1.00e-04\n\n")

    def test_print_pvalue_star(self):
        pvalue_format = PValueFormat()
        pvalue_format.config(text_format="star")
        self.assert_print_pvalue(
            pvalue_format, "p-value annotation legend:\n"
            "      ns: p <= 1.00e+00\n"
            "       *: 1.00e-02 < p <= 5.00e-02\n"
            "      **: 1.00e-03 < p <= 1.00e-02\n"
            "     ***: 1.00e-04 < p <= 1.00e-03\n"
            "    ****: p <= 1.00e-04\n\n")

    def test_print_pvalue_other(self):
        pvalue_format = PValueFormat()
        pvalue_format.config(text_format="simple")
        self.assert_print_pvalue(pvalue_format, "")

    def test_get_configuration(self):
        pvalue_format = PValueFormat()
        self.assertDictEqual(
            pvalue_format.get_configuration(), {
                'correction_format':
                '{star} ({suffix})',
                'fontsize':
                'medium',
                'pvalue_format_string':
                '{:.3e}',
                'show_test_name':
                True,
                'simple_format_string':
                '{:.2f}',
                'text_format':
                'star',
                'pvalue_thresholds': [[1e-4, "****"], [1e-3, "***"],
                                      [1e-2, "**"], [0.05, "*"], [1, "ns"]]
            })

    def test_config_pvalue_thresholds(self):
        pvalue_format = PValueFormat()
        pvalue_format.config(pvalue_thresholds=[[0.001, "<= 0.001"],
                                                [0.05, "<= 0.05"], [1, 'ns']])
        self.assert_print_pvalue(
            pvalue_format, "p-value annotation legend:\n"
            "      ns: p <= 1.00e+00\n"
            " <= 0.05: 1.00e-03 < p <= 5.00e-02\n"
            "<= 0.001: p <= 1.00e-03\n\n")
Пример #5
0
class TestAnnotator(unittest.TestCase):
    """Test validation of parameters"""

    def setUp(self):
        self.data = [[1, 2, 3], [2, 5, 7]]
        self.data2 = pd.DataFrame([[1, 2], [2, 5], [3, 7]], columns=["X", "Y"])
        self.ax = sns.boxplot(data=self.data)
        self.df = pd.DataFrame.from_dict(
            {1: {'x': "a", 'y': 15, 'color': 'blue'},
             2: {'x': "a", 'y': 16, 'color': 'blue'},
             3: {'x': "b", 'y': 17, 'color': 'blue'},
             4: {'x': "b", 'y': 18, 'color': 'blue'},
             5: {'x': "a", 'y': 15, 'color': 'red'},
             6: {'x': "a", 'y': 16, 'color': 'red'},
             7: {'x': "b", 'y': 17, 'color': 'red'},
             8: {'x': "b", 'y': 18, 'color': 'red'}
             }).T

        self.pairs_for_df = [(("a", "blue"), ("b", "blue")),
                             (("a", "blue"), ("a", "red"))]
        self.df.y = self.df.y.astype(float)
        self.params_df = {
            "data": self.df,
            "x": "x",
            "y": "y",
            "hue": "color",
            "order": ["a", "b"],
            "hue_order": ['red', 'blue']}

    def test_init_simple(self):
        self.annot = Annotator(self.ax, [(0, 1)], data=self.data)

    def test_init_df(self):
        self.ax = sns.boxplot(**self.params_df)
        self.annot = Annotator(self.ax, pairs=self.pairs_for_df,
                               **self.params_df)

    def test_init_barplot(self):
        ax = sns.barplot(data=self.data)
        self.annot = Annotator(ax, [(0, 1)], plot="barplot", data=self.data)

    def test_test_name_provided(self):
        self.test_init_simple()
        with self.assertRaisesRegex(ValueError, "test"):
            self.annot.apply_test()

    def test_unmatched_x_in_box_pairs_without_hue(self):
        with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"):
            self.annot = Annotator(self.ax, [(0, 2)], data=self.data)

    def test_order_in_x(self):
        with self.assertRaisesRegex(ValueError, "(specified in `order`)"):
            self.annot = Annotator(self.ax, [(0, 2)], data=self.data,
                                   order=[0, 1, 2])

    def test_working_hue_orders(self):
        self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))],
                               data=self.df, x="x", y="y",
                               order=["a", "b"], hue='color',
                               hue_order=['red', 'blue'])

    def test_unmatched_hue_in_hue_order(self):
        with self.assertRaisesRegex(ValueError, "(specified in `hue_order`)"):
            self.annot = Annotator(self.ax, [(("a", "blue"), ("b", "blue"))],
                                   data=self.df, x="x", y="y",
                                   order=["a", "b"], hue='color',
                                   hue_order=['red', 'yellow'])

    def test_unmatched_hue_in_box_pairs(self):
        with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"):
            self.annot = Annotator(self.ax, [(("a", "yellow"), ("b", "blue"))],
                                   data=self.df, x="x", y="y",
                                   order=["a", "b"], hue='color',
                                   hue_order=['red', 'blue'])

    def test_unmatched_x_in_box_pairs_with_hue(self):
        with self.assertRaisesRegex(ValueError, "(specified in `pairs`)"):
            self.annot = Annotator(self.ax, [(("c", "blue"), ("b", "blue"))],
                                   data=self.df, x="x", y="y",
                                   order=["a", "b"], hue='color',
                                   hue_order=['red', 'blue'])

    def test_location(self):
        self.test_init_simple()
        with self.assertRaisesRegex(ValueError, "argument `loc`"):
            self.annot.configure(loc="somewhere")

    def test_unknown_parameter(self):
        self.test_init_simple()
        with self.assertRaisesRegex(
                InvalidParametersError, re.escape("parameter(s) \"that\"")):
            self.annot.configure(that="this")

    def test_format(self):
        self.test_init_simple()

        with self.assertRaisesRegex(ValueError, "argument `text_format`"):
            self.annot.configure(pvalue_format={'text_format': 'that'})

    def test_apply_comparisons_correction(self):
        self.test_init_simple()
        self.assertIsNone(self.annot._apply_comparisons_correction([]))

    def test_correct_num_custom_annotations(self):
        self.test_init_simple()
        with self.assertRaisesRegex(ValueError, "same length"):
            self.annot.set_custom_annotations(["One", "Two"])

    def test_not_implemented_plot(self):
        with self.assertRaises(NotImplementedError):
            Annotator(self.ax, [(0, 1)], data=self.data, plot="thatplot")

    def test_reconfigure_alpha(self):
        self.test_init_simple()
        with self.assertWarnsRegex(UserWarning, "pvalue_thresholds"):
            self.annot.configure(alpha=0.1)
        self.annot.reset_configuration()
        self.assertEqual(0.05, self.annot.alpha)

    def test_reconfigure_alpha_with_thresholds(self):
        self.test_init_simple()
        self.annot.configure(alpha=0.1,
                             pvalue_format={"pvalue_thresholds": DEFAULT})
        self.annot.reset_configuration()
        self.assertEqual(0.05, self.annot.alpha)

    def test_get_annotation_text_undefined(self):
        self.test_init_simple()
        self.assertIsNone(self.annot.get_annotations_text())

    def test_get_annotation_text_calculated(self):
        self.test_init_simple()
        self.annot.configure(test="Mann-Whitney", verbose=2)
        self.annot.apply_test()
        self.assertEqual(["ns"], self.annot.get_annotations_text())

    def test_get_annotation_text_in_input_order(self):
        self.test_init_df()
        self.annot.configure(test="Mann-Whitney", text_format="simple")
        self.annot.apply_test()

        expected = (['M.W.W. p = 0.25', 'M.W.W. p = 0.67']
                    if version.parse(scipy.__version__) < version.parse("1.7")
                    else ['M.W.W. p = 0.33', 'M.W.W. p = 1.00'])
        self.assertEqual(expected, self.annot.get_annotations_text())

    def test_init_df_inverted(self):
        box_pairs = self.pairs_for_df[::-1]
        self.ax = sns.boxplot(**self.params_df)
        self.annot = Annotator(self.ax, pairs=box_pairs, **self.params_df)

    def test_get_annotation_text_in_input_order_inverted(self):
        self.test_init_df_inverted()
        self.annot.configure(test="Mann-Whitney", text_format="simple")
        self.annot.apply_test()

        expected = (['M.W.W. p = 0.67', 'M.W.W. p = 0.25']
                    if version.parse(scipy.__version__) < version.parse("1.7")
                    else ['M.W.W. p = 1.00', 'M.W.W. p = 0.33'])
        self.assertEqual(expected, self.annot.get_annotations_text())

    def test_apply_no_apply_warns(self):
        self.test_init_df_inverted()
        self.annot.configure(test="Mann-Whitney", text_format="simple")
        self.annot.apply_and_annotate()

        self.ax = sns.boxplot(**self.params_df)
        self.annot.new_plot(self.ax, self.pairs_for_df, **self.params_df)
        self.annot.configure(test="Levene", text_format="simple")
        with self.assertWarns(UserWarning):
            self.annot.annotate()

    def test_apply_apply_no_warns(self):
        self.test_init_df_inverted()
        self.annot.configure(test="Mann-Whitney", text_format="simple")
        self.annot.apply_and_annotate()

        self.ax = sns.boxplot(**self.params_df)
        self.annot.new_plot(self.ax, self.pairs_for_df, **self.params_df)
        self.annot.configure(test="Mann-Whitney-gt", text_format="simple")
        self.annot.apply_and_annotate()

    def test_valid_parameters_df_data_only(self):
        self.ax = sns.boxplot(ax=self.ax, data=self.data2)
        annot = Annotator(self.ax, pairs=[("X", "Y")],
                          data=self.data2)
        annot.configure(test="Mann-Whitney").apply_and_annotate()

    def test_comparisons_correction_by_name(self):
        self.ax = sns.boxplot(ax=self.ax, data=self.data2)
        annot = Annotator(self.ax, pairs=[("X", "Y")],
                          data=self.data2)
        annot.configure(test="Mann-Whitney", comparisons_correction="BH")
        annot.apply_and_annotate()

    def test_empty_annotator_wo_new_plot_raises(self):
        annot = Annotator.get_empty_annotator()
        with self.assertRaises(RuntimeError):
            annot.configure(test="Mann-Whitney")

    def test_empty_annotator_then_new_plot_ok(self):
        annot = Annotator.get_empty_annotator()
        self.ax = sns.boxplot(ax=self.ax, data=self.data2)
        annot.new_plot(self.ax, pairs=[("X", "Y")],
                       data=self.data2)
        annot.configure(test="Mann-Whitney")

    def test_ensure_ax_operation_format_args_not_ok(self):
        with self.assertRaises(ValueError):
            _ensure_ax_operation_format(["func", "param", None])

    def test_ensure_ax_operation_format_op_not_ok(self):
        with self.assertRaises(ValueError):
            _ensure_ax_operation_format(["func", ["param"]])

    def test_ensure_ax_operation_format_kwargs_not_ok(self):
        with self.assertRaises(ValueError):
            _ensure_ax_operation_format(["func", ["param"], {"that"}])

    def test_ensure_ax_operation_format_func_not_ok(self):
        with self.assertRaises(ValueError):
            _ensure_ax_operation_format([sum, ["param"], {"that": "this"}])
Пример #6
0
 def test_valid_parameters_df_data_only(self):
     self.ax = sns.boxplot(ax=self.ax, data=self.data2)
     annot = Annotator(self.ax, pairs=[("X", "Y")],
                       data=self.data2)
     annot.configure(test="Mann-Whitney").apply_and_annotate()
Пример #7
0
class Test(unittest.TestCase):
    """Test that the annotations match the pvalues and format."""

    def setUp(self) -> None:
        # noinspection DuplicatedCode
        self.df = pd.DataFrame.from_dict(
            {1: {'x': "a", 'y': 15, 'color': 'blue'},
             2: {'x': "a", 'y': 16, 'color': 'blue'},
             3: {'x': "b", 'y': 17, 'color': 'blue'},
             4: {'x': "b", 'y': 18, 'color': 'blue'},
             5: {'x': "a", 'y': 15, 'color': 'red'},
             6: {'x': "a", 'y': 16, 'color': 'red'},
             7: {'x': "b", 'y': 17, 'color': 'red'},
             8: {'x': "b", 'y': 18, 'color': 'red'}
             }).T
        plotting = {
            "data": self.df,
            "x": "x",
            "y": "y",
            "hue": 'color'
        }
        self.ax = sns.boxplot(**plotting)
        self.annotator = Annotator(
            self.ax, pairs=[(("a", "blue"), ("a", "red")),
                            (("b", "blue"), ("b", "red")),
                            (("a", "blue"), ("b", "blue"))],
            **plotting)
        self.pvalues = [0.03, 0.04, 0.9]

    def test_ns_without_correction_star(self):
        annotations = self.annotator._get_results("auto", pvalues=self.pvalues)
        self.assertEqual(["*", "*", "ns"],
                         [annotation.text for annotation in annotations])

    def test_signif_with_type1_correction_star(self):
        bh = ComparisonsCorrection("BH")
        self.annotator.configure(comparisons_correction=bh)
        self.annotator.set_pvalues(self.pvalues)
        self.assertEqual(["* (ns)", "* (ns)", "ns"],
                         self.annotator.get_annotations_text())

    def test_signif_with_type1_correction_star_replace(self):
        bh = ComparisonsCorrection("BH")
        self.annotator.configure(comparisons_correction=bh,
                                 correction_format="replace")
        self.annotator.set_pvalues(self.pvalues)
        self.assertEqual(["ns", "ns", "ns"],
                         self.annotator.get_annotations_text())

    def test_signif_with_type1_correction_star_incorrect_num_comparisons(self):
        bh = ComparisonsCorrection("BH")
        self.annotator.configure(comparisons_correction=bh)
        with self.assertRaisesRegex(ValueError, "positive"):
            self.annotator.set_pvalues(self.pvalues, num_comparisons=0)

    def test_signif_with_type1_correction_star_abnormal_num_comparisons(self):
        bh = ComparisonsCorrection("BH")
        self.annotator.configure(comparisons_correction=bh)
        with self.assertWarnsRegex(UserWarning, "Manually-specified"):
            self.annotator.set_pvalues(self.pvalues, num_comparisons=1)

    def test_signif_with_type0_correction_star(self):
        bonferroni = ComparisonsCorrection("bonferroni")
        self.annotator.configure(comparisons_correction=bonferroni)

        self.annotator.set_pvalues(self.pvalues)

        self.assertEqual(["ns", "ns", "ns"],
                         self.annotator.get_annotations_text())

    def test_signif_with_type1_correction_simple(self):
        bh = ComparisonsCorrection("BH")
        self.annotator.configure(comparisons_correction=bh,
                                 pvalue_format={'text_format': 'simple'})
        self.annotator.set_pvalues(self.pvalues)

        expected = ["p ≤ 0.05 (ns)", "p ≤ 0.05 (ns)", "p = 0.90"]

        self.assertEqual(expected, self.annotator.get_annotations_text())

    def test_signif_with_type0_correction_simple(self):
        bonferroni = ComparisonsCorrection("bonferroni")
        self.annotator.configure(comparisons_correction=bonferroni,
                                 pvalue_format={'text_format': 'simple'})

        self.annotator.set_pvalues(self.pvalues)

        expected = ["p = 0.09", "p = 0.12", "p = 1.00"]

        self.assertEqual(expected, self.annotator.get_annotations_text())

    def test_reapply_annotations(self):
        pass
Пример #8
0
def plots(input_dir, date, data_filter, virus, passage_order, transition_order, pairs, label_order, pairs_adar, filter_reads=None):
    output_dir = input_dir + date + "_plots"
    plus_minus = u"\u00B1"
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)
    if filter_reads is True:
        data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0, data_filter["no_variants"])
        data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000]
    mutation_order = ["A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G", "C>A", "G>U"]
    type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    # g1 = sns.catplot("label", "frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20",
    #                     kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus))
    # g1.set_xticklabels(fontsize=9, rotation=45)
    # g1.set(yscale='log')
    # g1.set(ylim=(10**-5, 10**-1))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()

    data_filter["passage"] = data_filter["passage"].astype(str)
    data_filter["passage"] = np.where(data_filter["passage"] != "RNA\nControl", "p" + data_filter["passage"], data_filter["passage"])
    g2 = sns.catplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", order=passage_order,
                     palette=mutation_palette(4)
                     , kind="point", dodge=0.5, hue_order=transition_order, join=False, estimator=weighted_varaint,
                     orient="v")
    g2.set_axis_labels("Passage", "Variant Frequency {} CI=95%".format(plus_minus))
    g2.set(yscale='log')
    g2.set(ylim=(10 ** -6, 10 ** -2))
    # g2.set_xticklabels(fontsize=10, rotation=45)
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #                   "/Transition_Mutations_point_plot_Mahoney", dpi=300)
    g2.savefig(output_dir + "/Transition_Mutations_point_plot_{0}".format(virus), dpi=300)
    plt.close()

    passage_g = sns.boxplot(x="passage", y="Frequency", data=data_filter, hue="Mutation", order=passage_order,
                            palette=mutation_palette(4), dodge=True, hue_order=transition_order)
    passage_g.set_yscale('log')
    passage_g.set_ylim(10 ** -6, 10 ** -1)
    passage_g.set(xlabel="Passage", ylabel="Variant Frequency")

    annot = Annotator(passage_g, pairs, x="passage", y="Frequency", hue="Mutation", data=data_filter,
                      order=passage_order, hue_order=transition_order)
    annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            passage_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_{0}".format(virus), dpi=300)
    plt.close()

    data_filter_synonymous = data_filter.loc[data_filter.Type == "Synonymous"]
    data_filter_synonymous["Mutation"] = np.where(((data_filter_synonymous["Mutation"] == "A>G") &
                                                   (data_filter_synonymous["5`_ADAR_Preference"] == "High")),
                                                  "High\nADAR-like\nA>G", np.where(((data_filter_synonymous["Mutation"] == "A>G")
                                                                                    & (data_filter_synonymous["5`_ADAR_Preference"] == "Intermediate")),
                                                                                   "Intermediate\nADAR-like\nA>G",
                                                                                   np.where(((data_filter_synonymous["Mutation"] == "A>G") &
                                                                                             (data_filter_synonymous["5`_ADAR_Preference"] == "Low")),
                                                                                            "Low\nADAR-like\nA>G",
                                                                                            data_filter_synonymous["Mutation"])))
    data_filter_synonymous["Mutation_adar"] = np.where(((data_filter_synonymous["Mutation"] == "U>C") &
                                                        (data_filter_synonymous["3`_ADAR_Preference"] == "High")),
                                                       "High\nADAR-like\nU>C", np.where(((data_filter_synonymous["Mutation"] == "U>C")
                                                                                         & (data_filter_synonymous["3`_ADAR_Preference"] == "Intermediate")),
                                                                                        "Intermediate\nADAR-like\nU>C",
                                                                                        np.where(((data_filter_synonymous["Mutation"] == "U>C") &
                                                                                                  (data_filter_synonymous["3`_ADAR_Preference"] == "Low")),
                                                                                                 "Low\nADAR-like\nU>C",
                                                                                                 data_filter_synonymous["Mutation"])))
    mutation_adar_order = ["High\nADAR-like\nA>G", "Low\nADAR-like\nA>G",
                           "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"]

    data_filter_synonymous["passage"] = data_filter_synonymous["passage"].astype(str)
    catplot_adar = sns.catplot(x="passage", y="frac_and_weight", data=data_filter_synonymous, hue="Mutation_adar",
                               order=passage_order, palette=mutation_palette(4, adar=True), kind="point", dodge=0.5,
                               hue_order=mutation_adar_order, join=False, estimator=weighted_varaint, orient="v",
                               legend=True)
    catplot_adar.set_axis_labels("Passage", "Variant Frequency {0} CI=95%".format(plus_minus))
    catplot_adar.set(yscale='log')
    catplot_adar.set(ylim=(10 ** -6, 10 ** -2))
    plt.savefig(output_dir + "/adar_pref_mutation_point_plot_{0}.png".format(virus), dpi=300)
    plt.close()

    adar_g = sns.boxplot(x="passage", y="Frequency", data=data_filter_synonymous, hue="Mutation_adar",
                         order=passage_order, palette=mutation_palette(4, adar=True), dodge=True,
                         hue_order=mutation_adar_order)
    adar_g.set_yscale('log')
    adar_g.set_ylim(10 ** -6, 10 ** -1)
    adar_g.set(xlabel="Passage", ylabel="Variant Frequency")
    annot = Annotator(adar_g, pairs_adar, x="passage", y="Frequency", hue="Mutation_adar",
                      data=data_filter_synonymous, hue_order=mutation_adar_order, order=passage_order)
    annot.configure(test='t-test_welch', text_format='star', loc='outside', verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts_adar.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            adar_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/adar_pref_mutation_box_plot_{0}.png".format(virus), dpi=300)
    plt.close()
Пример #9
0
def main():
    # input_dir = "/Users/odedkushnir/Projects/fitness/AccuNGS/190627_RV_CV/RVB14/"
    # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/patients/"
    input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/patients/"
    prefix = "inosine_predict_context_freq0.01"
    date = datetime.today().strftime("%Y%m%d")
    output_dir = input_dir + "{0}_{1}".format(date, prefix)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl")
    data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl")
    data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl")
    data_filter["label"] = np.where(
        data_filter["label"] == "RNA Control\nPrimer ID", "RNA\nControl",
        data_filter["label"])

    #Plots
    label_order = [
        "RNA\nControl", "p3 Cell Culture\nControl", "Patient-1", "Patient-4",
        "Patient-5", "Patient-9", "Patient-16", "Patient-17", "Patient-20"
    ]
    mutation_order = [
        "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G",
        "C>A", "G>U"
    ]
    transition_order = ["A>G", "U>C", "G>A", "C>U"]
    type_order1 = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    context_order = ["UpA", "ApA", "CpA", "GpA"]
    type_order2 = ["Synonymous", "Non-Synonymous"]
    context_order_uc = ["UpA", "UpU", "UpG", "UpC"]
    type_order_ag = ["Synonymous", "Non-Synonymous", "NonCodingRegion"]
    adar_preference = ["High", "Intermediate", "Low"]
    plus_minus = u"\u00B1"
    pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "G>A")),
             (("Patient-1", "A>G"), ("Patient-1", "G>A")),
             (("Patient-4", "A>G"), ("Patient-4", "G>A")),
             (("Patient-5", "A>G"), ("Patient-5", "G>A")),
             (("Patient-9", "A>G"), ("Patient-9", "G>A")),
             (("Patient-16", "A>G"), ("Patient-16", "G>A")),
             (("Patient-17", "A>G"), ("Patient-17", "G>A")),
             (("Patient-20", "A>G"), ("Patient-20", "G>A")),
             (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "U>C")),
             (("Patient-1", "A>G"), ("Patient-1", "U>C")),
             (("Patient-4", "A>G"), ("Patient-4", "U>C")),
             (("Patient-5", "A>G"), ("Patient-5", "U>C")),
             (("Patient-9", "A>G"), ("Patient-9", "U>C")),
             (("Patient-16", "A>G"), ("Patient-16", "U>C")),
             (("Patient-17", "A>G"), ("Patient-17", "U>C")),
             (("Patient-20", "A>G"), ("Patient-20", "U>C")),
             (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
             (("p3 Cell Culture\nControl", "A>G"), ("p3 Cell Culture\nControl",
                                                    "C>U")),
             (("Patient-1", "A>G"), ("Patient-1", "C>U")),
             (("Patient-4", "A>G"), ("Patient-4", "C>U")),
             (("Patient-5", "A>G"), ("Patient-5", "C>U")),
             (("Patient-9", "A>G"), ("Patient-9", "C>U")),
             (("Patient-16", "A>G"), ("Patient-16", "C>U")),
             (("Patient-17", "A>G"), ("Patient-17", "C>U")),
             (("Patient-20", "A>G"), ("Patient-20", "C>U"))]

    # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order, palette="tab20",
    #                     kind="point", dodge=True, hue_order=mutation_order, join=False, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    # g1.set_xticklabels(fontsize=9, rotation=90)
    # g1.set(yscale='log')
    # # g1.set(ylim=(10**-7, 10**-3))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()
    g2 = sns.catplot(x="label",
                     y="frac_and_weight",
                     data=data_filter,
                     hue="Mutation",
                     order=label_order,
                     palette=mutation_palette(4),
                     kind="point",
                     dodge=0.5,
                     hue_order=transition_order,
                     join=False,
                     estimator=weighted_varaint,
                     orient="v",
                     legend=True)
    g2.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g2.set(yscale='log')
    g2.set(ylim=(10**-5, 10**-3))
    # g2.set_yticklabels(fontsize=12)
    g2.set_xticklabels(fontsize=10, rotation=90)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300)
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #                   "/Fig9a_Transition_Mutations_point_plot_Patients", dpi=300)
    plt.close()
    data_filter["label"] = data_filter["label"].astype(str)
    data_filter["Frequency"] = data_filter["Frequency"].astype(float)
    passage_g = sns.boxplot(x="label",
                            y="Frequency",
                            data=data_filter,
                            hue="Mutation",
                            order=label_order,
                            palette=mutation_palette(4),
                            dodge=True,
                            hue_order=transition_order)
    passage_g.set_yscale('log')
    passage_g.set_ylim(10**-6, 10**-1)
    passage_g.set(xlabel="", ylabel="Variant Frequency")
    passage_g.set_xticklabels(labels=label_order, fontsize=10, rotation=90)

    annot = Annotator(passage_g,
                      pairs,
                      x="label",
                      y="Frequency",
                      hue="Mutation",
                      data=data_filter,
                      order=label_order,
                      hue_order=transition_order)
    annot.configure(test='t-test_welch',
                    text_format='star',
                    loc='outside',
                    verbose=2,
                    comparisons_correction="Bonferroni")
    annot.apply_test()
    file_path = output_dir + "/sts.csv"
    with open(file_path, "w") as o:
        with contextlib.redirect_stdout(o):
            passage_g, test_results = annot.annotate()
    plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
    plt.tight_layout()
    plt.savefig(output_dir + "/Transition_Mutations_box_stat_plot_patients",
                dpi=300)
    plt.close()

    # g_rna = sns.catplot(x="RNA", y="frac_and_weight", data=data_filter, hue="Mutation", order=rna_order,
    #                  palette="tab20", kind="point", dodge=True, hue_order=transition_order, join=False, estimator=weighted_varaint,
    #                  orient="v", legend=True)
    # g_rna.set_axis_labels("", "Variant Frequency")
    # g_rna.set(yscale='log')
    # g_rna.set(ylim=(10 ** -6, 10 ** -2))
    # # g2.set_yticklabels(fontsize=12)
    # g_rna.set_xticklabels(fontsize=10, rotation=45)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    # g_rna.savefig(output_dir + "/Transition_Mutations_point_RNA_plot", dpi=300)
    # plt.close()

    # A>G Prev Context
    flatui = ["#3498db", "#9b59b6"]
    g5 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_ag,
                     hue="ADAR_like",
                     order=label_order,
                     palette=mutation_palette(2),
                     kind="point",
                     dodge=True,
                     hue_order=[True, False],
                     estimator=weighted_varaint,
                     orient="v",
                     col="Type",
                     join=False,
                     col_order=type_order2)
    g5.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g5.set(yscale='log')
    g5.set(ylim=(7 * 10**-7, 4 * 10**-3))
    g5.set_xticklabels(rotation=90)
    # plt.show()
    g5.savefig(output_dir + "/Context_point_plot", dpi=300)
    # g5.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/Prgress reports/20200913 Final report/plots" +
    #            "/Fig9b_Context_point_plot_Patients", dpi=300)
    plt.close()

    mutation_ag = sns.catplot("label",
                              "frac_and_weight",
                              data=data_filter_ag,
                              hue="5`_ADAR_Preference",
                              palette=mutation_palette(3, adar=True, ag=True),
                              kind="point",
                              dodge=True,
                              estimator=weighted_varaint,
                              order=label_order,
                              orient="v",
                              col="Type",
                              join=False,
                              col_order=type_order_ag,
                              hue_order=adar_preference)
    mutation_ag.set(yscale="log")
    mutation_ag.set(ylim=(1 * 10**-5, 1 * 10**-2))
    mutation_ag.set_xticklabels(rotation=90)
    mutation_ag.fig.suptitle("A>G ADAR_like Mutation in RV patients", y=0.99)
    plt.subplots_adjust(top=0.85)
    mutation_ag.set_axis_labels(
        "", "Variant Frequency {} CI=95%".format(plus_minus))
    mutation_ag.savefig(output_dir + "/ag_ADAR_like_Mutation_col_patients.png",
                        dpi=300)
    plt.close()

    g6 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_ag,
                     hue="ADAR_like",
                     order=label_order,
                     palette=mutation_palette(2),
                     kind="point",
                     dodge=True,
                     hue_order=[True, False],
                     estimator=weighted_varaint,
                     orient="v",
                     join=False)
    g6.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g6.set(yscale='log')
    g6.set(ylim=(7 * 10**-7, 4 * 10**-3))
    g6.set_xticklabels(rotation=90)
    # plt.show()
    g6.savefig(output_dir + "/Context_point_all_mutations_type_plot", dpi=300)
    plt.close()

    g9 = sns.catplot("label",
                     "frac_and_weight",
                     data=data_filter_uc,
                     hue="Next",
                     order=label_order,
                     palette="tab20",
                     hue_order=context_order_uc,
                     estimator=weighted_varaint,
                     orient="v",
                     dodge=True,
                     kind="point",
                     col="Type",
                     join=False,
                     col_order=type_order2)
    g9.set_axis_labels("", "Variant Frequency {} CI=95%".format(plus_minus))
    g9.set(yscale='log')
    g9.set(ylim=(10**-5, 10**-2))
    g9.set_xticklabels(rotation=90)
    # plt.show()
    g9.savefig(output_dir + "/UC_Context_point_plot", dpi=300)
    plt.close()

    data_filter_ag_grouped = data_filter_ag.groupby(
        ["ADAR_like", "label",
         "Type"])["frac_and_weight"].agg(lambda x: weighted_varaint(x))
    data_filter_ag_grouped = data_filter_ag_grouped.reset_index()
    data_filter_ag_grouped = data_filter_ag_grouped.rename(
        columns={"frac_and_weight": "Frequency"})
    data_filter_ag_grouped["Frequency"] = data_filter_ag_grouped[
        "Frequency"].astype(float)
    print(data_filter_ag_grouped.to_string())

    data_filter_ag_grouped_silent = data_filter_ag_grouped[
        data_filter_ag_grouped["Type"] == "Synonymous"]
    data_filter_ag_grouped_silent = data_filter_ag_grouped_silent[
        data_filter_ag_grouped_silent["label"] == "Cell Cultureֿ\nControl"]
Пример #10
0
class Test(unittest.TestCase):
    """Test the Annotator's functionality (depending on _Plotter)."""

    def setUp(self) -> None:
        # noinspection DuplicatedCode
        self.df = pd.DataFrame.from_dict(
            {1: {'x': "a", 'y': 15, 'color': 'blue'},
             2: {'x': "a", 'y': 16, 'color': 'blue'},
             3: {'x': "b", 'y': 17, 'color': 'blue'},
             4: {'x': "b", 'y': 18, 'color': 'blue'},
             5: {'x': "a", 'y': 15, 'color': 'red'},
             6: {'x': "a", 'y': 16, 'color': 'red'},
             7: {'x': "b", 'y': 17, 'color': 'red'},
             8: {'x': "b", 'y': 18, 'color': 'red'}
             }).T
        self.plotting = {
            "data": self.df,
            "x": "x",
            "y": "y",
            "hue": 'color',
        }
        self.df.y = self.df.y.astype(float)

    def test_dodge_false_raises(self):
        ax = sns.barplot(dodge=False, **self.plotting)
        with self.assertRaisesRegex(ValueError, "dodge"):
            self.annotator = Annotator(
                ax, dodge=False, plot="barplot",
                pairs=[(("a", "blue"), ("a", "red")),
                       (("b", "blue"), ("b", "red")),
                       (("a", "blue"), ("b", "blue"))],
                **self.plotting)

    def test_wrong_plotter_engine(self):
        ax = sns.barplot(**self.plotting)
        with self.assertRaisesRegex(NotImplementedError, "plotly"):
            self.annotator = Annotator(
                ax, plot="barplot", engine="plotly",
                pairs=[(("a", "blue"), ("a", "red")),
                       (("b", "blue"), ("b", "red")),
                       (("a", "blue"), ("b", "blue"))],
                **self.plotting)

    def test_orient_horizontal(self):
        plotting = {**self.plotting, 'orient': 'h',
                    'x': 'y', 'y': 'x', 'dodge': True}
        ax = sns.stripplot(**plotting)
        self.annotator = Annotator(
            ax, plot="stripplot",
            pairs=[(("a", "blue"), ("a", "red")),
                   (("b", "blue"), ("b", "red")),
                   (("a", "blue"), ("b", "blue"))],
            **plotting)
        self.annotator.configure(test="Mann-Whitney")
        self.annotator.apply_and_annotate()

    def test_fixed_offset(self):
        ax = sns.barplot(**self.plotting)
        self.annotator = Annotator(
            ax, plot="barplot",
            pairs=[(("a", "blue"), ("a", "red")),
                   (("b", "blue"), ("b", "red")),
                   (("a", "blue"), ("b", "blue"))],
            **self.plotting)
        self.annotator.configure(test="Mann-Whitney", use_fixed_offset=True)
        self.annotator.apply_and_annotate()
Пример #11
0
def main():
    # input_dir = "/Volumes/STERNADILABHOME$/volume3/okushnir/AccuNGS/20201008RV-202329127/merged/passages/"
    """Local"""
    input_dir = "/Users/odedkushnir/PhD_Projects/After_review/AccuNGS/RV/passages/"
    prefix = "inosine_predict_context"
    date = datetime.today().strftime("%Y%m%d")
    output_dir = input_dir + "{0}_{1}".format(date, prefix)
    try:
        os.mkdir(output_dir)
    except OSError:
        print("Creation of the directory %s failed" % output_dir)
    else:
        print("Successfully created the directory %s " % output_dir)

    data_filter = pd.read_pickle(input_dir + prefix + "/data_filter.pkl")
    data_filter_ag = pd.read_pickle(input_dir + prefix + "/data_filter_ag.pkl")
    data_filter_uc = pd.read_pickle(input_dir + prefix + "/data_filter_uc.pkl")
    data_filter["passage"] = data_filter["passage"].astype(int)
    data_filter["no_variants"] = np.where(data_filter["Prob"] < 0.95, 0,
                                          data_filter["no_variants"])
    data_filter["Read_count"] = data_filter[data_filter["Read_count"] > 10000]

    #Plots
    label_order = [
        "RNA Control\nRND", "RNA Control\nPrimer ID", "p2-1", "p2-2", "p2-3",
        "p5-1", "p5-2", "p5-3", "p8-1", "p8-2", "p8-3", "p10-2", "p10-3",
        "p12-1", "p12-2", "p12-3"
    ]
    mutation_order = [
        "A>G", "U>C", "G>A", "C>U", "A>C", "U>G", "A>U", "U>A", "G>C", "C>G",
        "C>A", "G>U"
    ]
    transition_order = ["A>G", "U>C", "G>A", "C>U"]
    type_order = ["Synonymous", "Non-Synonymous", "Premature Stop Codon"]
    type_order_ag = ["Synonymous", "Non-Synonymous"]
    context_order = ["UpA", "ApA", "CpA", "GpA"]
    context_order_uc = ["UpU", "UpA", "UpC", "UpG"]
    adar_preference = ["High", "Intermediate", "Low"]
    plus_minus = u"\u00B1"

    # g1 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order,
    #                  palette="Set2",
    #                  kind="point", dodge=False, hue_order=mutation_order, join=True, estimator=weighted_varaint,
    #                  orient="v")
    # g1.set_axis_labels("", "Variant Frequency")
    # g1.set_xticklabels(fontsize=9, rotation=45)
    # g1.set(yscale='log')
    # g1.set(ylim=(10 ** -7, 10 ** -3))
    #
    # # plt.show()
    # g1.savefig(output_dir + "/All_Mutations_point_plot", dpi=300)
    # plt.close()
    #
    # g2 = sns.catplot(x="label", y="frac_and_weight", data=data_filter, hue="Mutation", order=label_order,
    #                  palette=mutation_palette(4), kind="point", dodge=True, hue_order=transition_order, join=False,
    #                  estimator=weighted_varaint,
    #                  orient="v", legend=True)
    # g2.set_axis_labels("", "Variant Frequency")
    # g2.set(yscale='log', ylim=(10 ** -6, 10 ** -2), xlim=(0, 12, 2))
    # # g2.set_yticklabels(fontsize=12)
    # g2.set_xticklabels(fontsize=9, rotation=90)
    # plt.show()
    # g2.savefig("/Users/odedkushnir/Google Drive/Studies/PhD/MyPosters/20190924 GGE/plots/Transition_Mutations_point_plot_RV", dpi=300)
    # g2.savefig(output_dir + "/Transition_Mutations_point_plot", dpi=300)
    # plt.close()
    replica_lst = [1, 2, 3]
    for replica in replica_lst:
        data_filter_replica = data_filter[data_filter["replica"] == replica]
        data_filter_replica["passage"] = data_filter_replica["passage"].astype(
            str)
        data_filter_replica["passage"] = "p" + data_filter_replica["passage"]
        if replica == 2:
            data_filter_replica = pd.read_pickle(input_dir + prefix +
                                                 "/data_filter.pkl")
            data_filter_replica["passage"] = data_filter_replica[
                "passage"].astype(int)
            data_filter_replica["no_variants"] = np.where(
                data_filter_replica["Prob"] < 0.95, 0,
                data_filter_replica["no_variants"])
            data_filter_replica["Read_count"] = data_filter_replica[
                data_filter_replica["Read_count"] > 10000]
            data_filter_replica["passage"] = data_filter_replica[
                "passage"].astype(str)
            data_filter_replica[
                "passage"] = "p" + data_filter_replica["passage"]
            data_filter_replica["replica"] = np.where(
                data_filter_replica["passage"] == "p0", 2,
                data_filter_replica["replica"])
            data_filter_replica = data_filter_replica[
                data_filter_replica["replica"] == replica]
        data_filter_replica["passage"] = np.where(
            data_filter_replica["passage"] == "p0", "RNA\nControl",
            data_filter_replica["passage"])

        if replica == 1:
            passage_order = ["RNA\nControl", "p2", "p5", "p8", "p12"]
            pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
                     (("p2", "A>G"), ("p2", "G>A")),
                     (("p5", "A>G"), ("p5", "G>A")),
                     (("p8", "A>G"), ("p8", "G>A")),
                     (("p12", "A>G"), ("p12", "G>A")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
                     (("p2", "A>G"), ("p2", "U>C")),
                     (("p5", "A>G"), ("p5", "U>C")),
                     (("p8", "A>G"), ("p8", "U>C")),
                     (("p12", "A>G"), ("p12", "U>C")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
                     (("p2", "A>G"), ("p2", "C>U")),
                     (("p5", "A>G"), ("p5", "C>U")),
                     (("p8", "A>G"), ("p8", "C>U")),
                     (("p12", "A>G"), ("p12", "C>U"))]
            pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"),
                           ("RNA\nControl", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nA>G"),
                           ("p2", "Low\nADAR-like\nA>G")),
                          (("p5", "High\nADAR-like\nA>G"),
                           ("p5", "Low\nADAR-like\nA>G")),
                          (("p8", "High\nADAR-like\nA>G"),
                           ("p8", "Low\nADAR-like\nA>G")),
                          (("p12", "High\nADAR-like\nA>G"),
                           ("p12", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nU>C"),
                           ("p2", "Low\nADAR-like\nU>C")),
                          (("p5", "High\nADAR-like\nU>C"),
                           ("p5", "Low\nADAR-like\nU>C")),
                          (("p8", "High\nADAR-like\nU>C"),
                           ("p8", "Low\nADAR-like\nU>C")),
                          (("p12", "High\nADAR-like\nU>C"),
                           ("p12", "Low\nADAR-like\nU>C"))]
        else:
            passage_order = ["RNA\nControl", "p2", "p5", "p8", "p10", "p12"]
            pairs = [(("RNA\nControl", "A>G"), ("RNA\nControl", "G>A")),
                     (("p2", "A>G"), ("p2", "G>A")),
                     (("p5", "A>G"), ("p5", "G>A")),
                     (("p8", "A>G"), ("p8", "G>A")),
                     (("p10", "A>G"), ("p10", "G>A")),
                     (("p12", "A>G"), ("p12", "G>A")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "U>C")),
                     (("p2", "A>G"), ("p2", "U>C")),
                     (("p5", "A>G"), ("p5", "U>C")),
                     (("p8", "A>G"), ("p8", "U>C")),
                     (("p10", "A>G"), ("p10", "U>C")),
                     (("p12", "A>G"), ("p12", "U>C")),
                     (("RNA\nControl", "A>G"), ("RNA\nControl", "C>U")),
                     (("p2", "A>G"), ("p2", "C>U")),
                     (("p5", "A>G"), ("p5", "C>U")),
                     (("p8", "A>G"), ("p8", "C>U")),
                     (("p10", "A>G"), ("p10", "C>U")),
                     (("p12", "A>G"), ("p12", "C>U"))]
            pairs_adar = [(("RNA\nControl", "High\nADAR-like\nA>G"),
                           ("RNA\nControl", "Low\nADAR-like\nA>G")),
                          (("p2", "High\nADAR-like\nA>G"),
                           ("p2", "Low\nADAR-like\nA>G")),
                          (("p5", "High\nADAR-like\nA>G"),
                           ("p5", "Low\nADAR-like\nA>G")),
                          (("p8", "High\nADAR-like\nA>G"),
                           ("p8", "Low\nADAR-like\nA>G")),
                          (("p10", "High\nADAR-like\nA>G"),
                           ("p10", "Low\nADAR-like\nA>G")),
                          (("p12", "High\nADAR-like\nA>G"),
                           ("p12", "Low\nADAR-like\nA>G")),
                          (("RNA\nControl", "High\nADAR-like\nU>C"),
                           ("RNA\nControl", "Low\nADAR-like\nU>C")),
                          (("p2", "High\nADAR-like\nU>C"),
                           ("p2", "Low\nADAR-like\nU>C")),
                          (("p5", "High\nADAR-like\nU>C"),
                           ("p5", "Low\nADAR-like\nU>C")),
                          (("p8", "High\nADAR-like\nU>C"),
                           ("p8", "Low\nADAR-like\nU>C")),
                          (("p10", "High\nADAR-like\nU>C"),
                           ("p10", "Low\nADAR-like\nU>C")),
                          (("p12", "High\nADAR-like\nU>C"),
                           ("p12", "Low\nADAR-like\nU>C"))]

        passage_g = sns.catplot(x="passage",
                                y="frac_and_weight",
                                data=data_filter_replica,
                                hue="Mutation",
                                order=passage_order,
                                palette=mutation_palette(4),
                                kind="point",
                                dodge=0.5,
                                hue_order=transition_order,
                                join=False,
                                estimator=weighted_varaint,
                                orient="v",
                                legend=True)
        passage_g.set_axis_labels(
            "Passage", "Variant Frequency {} CI=95%".format(plus_minus))
        passage_g.set(yscale='log', ylim=(10**-6, 10**-2))
        plt.savefig(
            output_dir +
            "/Transition_Mutations_point_plot_RVB14_replica%s" % str(replica),
            dpi=300)
        plt.close()

        passage_g1 = sns.boxplot(x="passage",
                                 y="Frequency",
                                 data=data_filter_replica,
                                 hue="Mutation",
                                 order=passage_order,
                                 palette=mutation_palette(4),
                                 dodge=True,
                                 hue_order=transition_order)
        passage_g1.set_yscale('log')
        passage_g1.set_ylim(10**-6, 10**-2)
        passage_g1.set(xlabel="Passage", ylabel="Variant Frequency")
        annot = Annotator(passage_g1,
                          pairs,
                          x="passage",
                          y="Frequency",
                          hue="Mutation",
                          data=data_filter_replica,
                          order=passage_order,
                          hue_order=transition_order)
        annot.configure(test='t-test_welch',
                        text_format='star',
                        loc='outside',
                        verbose=2,
                        comparisons_correction="Bonferroni")
        annot.apply_test()
        file_path = output_dir + "/sts{0}.csv".format(replica)
        with open(file_path, "w") as o:
            with contextlib.redirect_stdout(o):
                passage_g1, test_results = annot.annotate()
        plt.legend(bbox_to_anchor=(1.05, 0.5), loc=2, borderaxespad=0.)
        plt.tight_layout()
        plt.savefig(
            output_dir +
            "/Transition_Mutations_box_stat_plot_RVB14_replica{0}".format(
                replica),
            dpi=300)
        plt.close()
        # data_filter["passage"] = data_filter["passage"].astype(int)
        #
        #
        # g4 = sns.relplot("passage", "frac_and_weight", data=data_filter, hue="Mutation", palette=mutation_palette(4),
        #                  hue_order=transition_order, estimator=weighted_varaint, col="Type", kind="line",
        #                  col_order=type_order)
        #
        # g4.axes.flat[0].set_yscale('symlog', linthreshy=10 ** -5)
        # g4.set_axis_labels("Passage", "Variant Frequency")
        # # plt.show()
        # g4.savefig(output_dir + "/Time_Transition_Mutations_line_plot", dpi=300)
        # plt.close()
        """ADAR preferences"""
        data_filter_replica_synonymous = data_filter_replica.loc[
            data_filter_replica.Type == "Synonymous"]
        # data_filter_synonymous["ADAR_like"] = (data_filter_synonymous.Prev.str.contains('UpA') | data_filter_synonymous.Prev.str.contains('ApA'))
        data_filter_replica_synonymous["Mutation"] = np.where(
            ((data_filter_replica_synonymous["Mutation"] == "A>G") &
             (data_filter_replica_synonymous["5`_ADAR_Preference"] == "High")),
            "High\nADAR-like\nA>G",
            np.where(
                ((data_filter_replica_synonymous["Mutation"] == "A>G")
                 & (data_filter_replica_synonymous["5`_ADAR_Preference"]
                    == "Intermediate")), "Intermediate\nADAR-like\nA>G",
                np.where(
                    ((data_filter_replica_synonymous["Mutation"] == "A>G") &
                     (data_filter_replica_synonymous["5`_ADAR_Preference"]
                      == "Low")), "Low\nADAR-like\nA>G",
                    data_filter_replica_synonymous["Mutation"])))
        data_filter_replica_synonymous["Mutation_adar"] = np.where(
            ((data_filter_replica_synonymous["Mutation"] == "U>C") &
             (data_filter_replica_synonymous["3`_ADAR_Preference"] == "High")),
            "High\nADAR-like\nU>C",
            np.where(
                ((data_filter_replica_synonymous["Mutation"] == "U>C")
                 & (data_filter_replica_synonymous["3`_ADAR_Preference"]
                    == "Intermediate")), "Intermediate\nADAR-like\nU>C",
                np.where(
                    ((data_filter_replica_synonymous["Mutation"] == "U>C") &
                     (data_filter_replica_synonymous["3`_ADAR_Preference"]
                      == "Low")), "Low\nADAR-like\nU>C",
                    data_filter_replica_synonymous["Mutation"])))
        mutation_adar_order = [
            "High\nADAR-like\nA>G", "Low\nADAR-like\nA>G",
            "High\nADAR-like\nU>C", "Low\nADAR-like\nU>C"
        ]
        # data_filter_replica_synonymous["passage"] = data_filter_replica_synonymous["passage"].astype(str)
        # data_filter_replica_synonymous["passage"] = "p" + data_filter_replica_synonymous["passage"]
        catplot_adar = sns.catplot(x="passage",
                                   y="frac_and_weight",
                                   data=data_filter_replica_synonymous,
                                   hue="Mutation_adar",
                                   order=passage_order,
                                   palette=mutation_palette(4, adar=True),
                                   kind="point",
                                   dodge=0.5,
                                   hue_order=mutation_adar_order,
                                   join=False,
                                   estimator=weighted_varaint,
                                   orient="v",
                                   legend=True)
        catplot_adar.set_axis_labels(
            "Passage", "Variant Frequency {} CI=95%".format(plus_minus))
        catplot_adar.set(yscale='log')
        catplot_adar.set(ylim=(10**-6, 10**-2))
        # catplot_adar.set_xticklabels(fontsize=8)
        # plt.tight_layout()
        plt.savefig(
            output_dir +
            "/adar_pref_mutation_point_plot_RVB14_replica{0}.png".format(
                replica),
            dpi=300)
        plt.close()

        adar_g = sns.boxplot(x="passage",
                             y="Frequency",
                             data=data_filter_replica_synonymous,
                             hue="Mutation_adar",
                             order=passage_order,
                             palette=mutation_palette(4, adar=True),
                             dodge=True,
                             hue_order=mutation_adar_order)
        adar_g.set_yscale('log')
        adar_g.set_ylim(10**-6, 10**-1)
        adar_g.set(xlabel="Passage", ylabel="Variant Frequency")

        annot = Annotator(adar_g,
                          pairs_adar,
                          x="passage",
                          y="Frequency",
                          hue="Mutation_adar",
                          data=data_filter_replica_synonymous,
                          hue_order=mutation_adar_order)
        annot.configure(test='t-test_welch',
                        text_format='star',
                        loc='outside',
                        verbose=2,
                        comparisons_correction="Bonferroni")
        annot.apply_test()
        file_path = output_dir + "/sts_adar_{0}.csv".format(replica)
        with open(file_path, "w") as o:
            with contextlib.redirect_stdout(o):
                adar_g, test_results = annot.annotate()
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
        plt.tight_layout()
        plt.savefig(
            output_dir +
            "/adar_pref_mutation_box_stat_plot_RVB14_replica{0}".format(
                replica),
            dpi=300)
        plt.close()