Пример #1
0
    def add_stats(df, variable1, variable2, ax, order):

        stat = dunn_posthoc_test(df, y_variable, x_variable)
        # label box pairs
        box_pairs = [
            (variable1, variable2),
            (variable1, "control"),
            (variable2, "control"),
        ]

        # make empty list of p_values
        p_values = []
        # populate the list of p_values according to the box_pairs
        for pair in box_pairs:
            print(pair)
            # select p value for each pair
            p = stat.loc[pair[0], pair[1]]
            p_values.append(p)

        # add stats annotation to the plot
        add_stat_annotation(
            ax,
            # plot=plot_type,
            data=df,
            x=x,
            y=y,
            order=order,
            box_pairs=box_pairs,
            text_format="star",
            loc="outside",
            verbose=2,
            perform_stat_test=False,
            pvalues=p_values,
            test_short_name="Dunn",
        )
Пример #2
0
def squat_plots(df_in, var_in, new_ylabel='Same'):
    boxpairs = [
        (('Avg', 'b'), ('Avg', 'p')),
        (('Max', 'b'), ('Max', 'p')),
        # (('Min', 'b'), ('Min', 'p')),
        (('25%', 'b'), ('25%', 'p')),
        (('75%', 'b'), ('75%', 'p')),
        # (('Std', 'b'), ('Std', 'p')),
    ]
    plot_out = sns.pointplot(x='Stat',
                             y=var_in,
                             hue='Mode',
                             join=False,
                             dodge=0.25,
                             data=df_in,
                             split=True,
                             palette="dark",
                             ci='sd')

    add_stat_annotation(plot_out,
                        data=df_in,
                        x='Stat',
                        y=var_in,
                        hue='Mode',
                        box_pairs=boxpairs,
                        test='Mann-Whitney',
                        comparisons_correction=None,
                        text_format='star',
                        loc='inside',
                        verbose=1,
                        pvalue_thresholds=[[1e-4, "**"], [1e-3, "**"],
                                           [1e-2, "**"], [0.05, "*"], [1, ""]])
    plot_out.set_ylabel(new_ylabel)
    plot_paper_params()
    return plot_out
Пример #3
0
def foot_plots(df_in, var_in):
    boxpairs = [(('Ball-Ball Dist.', 'b'), ('Ball-Ball Dist.', 'p')),
                (('Heel-Heel Dist.', 'b'), ('Heel-Heel Dist.', 'p')),
                (('Load Line L', 'b'), ('Load Line L', 'p')),
                (('Load Line R', 'b'), ('Load Line R', 'p'))]
    plot_out = sns.pointplot(x=' ',
                             y=var_in,
                             hue='mode',
                             join=False,
                             dodge=0.25,
                             data=df_in,
                             split=True,
                             palette="dark",
                             ci='sd')
    add_stat_annotation(plot_out,
                        data=df_in,
                        x=' ',
                        y=var_in,
                        hue='mode',
                        box_pairs=boxpairs,
                        test='Mann-Whitney',
                        comparisons_correction=None,
                        text_format='star',
                        loc='inside',
                        verbose=1,
                        pvalue_thresholds=[[1e-4, "**"], [1e-3, "**"],
                                           [1e-2, "**"], [0.05, "*"], [1, ""]])
    plot_paper_params()
    plt.xticks(rotation=90)
Пример #4
0
def draw_violin(input_file, output_file, watch):
    data = pandas.read_csv(input_file)

    seaborn.set(context="poster", style="whitegrid")

    fig, ax = matplotlib.pyplot.subplots(figsize=(24, 24))
    seaborn.violinplot(data=data,
                       x="Classification",
                       y=watch,
                       order=general.classes)
    statannot.add_stat_annotation(ax,
                                  data=data,
                                  x="Classification",
                                  y=watch,
                                  box_pairs=[
                                      (general.classes[i - 1],
                                       general.classes[i])
                                      for i in range(1, len(general.classes))
                                  ],
                                  test="t-test_ind",
                                  text_format="star",
                                  verbose=0,
                                  order=general.classes)

    fig.savefig(general.check_exist(output_file))
    matplotlib.pyplot.close(fig)
Пример #5
0
def en_trans_cli_plot(gene):
    en_clinical_and_proteomics = en.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "transcriptomics",
            metadata_cols    = "Proteomics_Tumor_Normal",
            omics_genes      = gene)
    en_clinical_and_proteomics.head()
## Show possible variations of Histologic_type
    en_clinical_and_proteomics["Proteomics_Tumor_Normal"].unique()
    sns.set(style      ="white",
            font_scale = 1.5)
    ax = sns.boxplot(x          = "Proteomics_Tumor_Normal",
                     y          = gene + '_transcriptomics',
                     data       = en_clinical_and_proteomics,
                     showfliers = False)
    sns.stripplot(x        = "Proteomics_Tumor_Normal",
                  y        = gene + '_transcriptomics',
                  data     = en_clinical_and_proteomics,
                  color    = '.3')
    add_stat_annotation(ax,
                        data        = en_clinical_and_proteomics,
                        x           = "Proteomics_Tumor_Normal",
                        y           = gene + '_transcriptomics',
                        boxPairList = [("Tumor", "Adjacent_normal")],
                        test        = 't-test_ind',
                        textFormat  = 'star',
                        loc         = 'inside',
                        verbose     = 2)
    plt.title('endometrial cancer')
Пример #6
0
    def test_sig_boxplotAccuracy(self):
        # print(self.acc.columns)
        fig, ax3 = plt.subplots(figsize=(9, 7))
        sns.set_style("whitegrid")
        ax3 = sns.boxplot(data=self.acc, color="white")
        ax3 = sns.swarmplot(data=self.acc, color=".25")
        # plt.ylabel("single ELMs Accuracy")
        # plt.ylabel("Boosting ELM Accuracy")
        # plt.ylabel("Bagging ELM Accuracy")
        # plt.ylabel("Majority ELM Accuracy")
        plt.ylabel("Accuracy Comparison")

        # statistical notation
        add_stat_annotation(ax3,
                            data=self.acc,
                            box_pairs=[("single ELMs", "majority voting ELMs"),
                                       ("single ELMs", "bagging-based ELMs"),
                                       ("single ELMs", "boosting-based ELMs"),
                                       ("majority voting ELMs",
                                        "boosting-based ELMs"),
                                       ("bagging-based ELMs",
                                        "boosting-based ELMs"),
                                       ("boosting-based ELMs", "SVMs")],
                            test='t-test_ind',
                            text_format='star',
                            loc='inside',
                            verbose=2)

        # plt.show(ax3)
        # fig.savefig('fig2SingleELMAccuracyStats.png')
        # fig.savefig('fig3BaggingELMAccuracyStats.png')
        # fig.savefig('fig4BoostingELMAccuracyStats.png')
        # fig.savefig('fig5MajorityELMAccuracyStats.png')
        fig.savefig('fig6comparisonELMSVMAccuracyStats.png')
def plot_clinical_status_vs_esm_params(res, output_dir):
    plt.figure(figsize=(10, 13))
    nrows = 2
    ncols = 2
    params = ["BETAS_est", "DELTAS_est", "BDR_log", "PUP_ROI_AB_Mean"]
    face_pal = {"No": "cornflowerblue", "Yes": "indianred"}
    titles = [
        "Production Rate", "Clearance Rate", "Prod/Clear Ratio (Log)",
        "Amyloid Beta"
    ]
    for i, param in enumerate(params):
        j = i + 1
        plt.subplot(nrows, ncols, j)
        g = sns.boxplot(x="Symptomatic",
                        y=param,
                        data=res[res.AB_Positive == "Yes"],
                        palette=face_pal)
        add_stat_annotation(g,
                            data=res[res.AB_Positive == "Yes"],
                            x="Symptomatic",
                            y=param,
                            box_pairs=[("Yes", "No")],
                            test='t-test_ind',
                            text_format='star',
                            loc='inside',
                            verbose=2,
                            fontsize=18)
        plt.ylabel("")
        plt.title(titles[i], fontsize=22)
        plt.xticks(fontsize=18)
        plt.yticks(fontsize=18)
        plt.xlabel("Symptomatic", fontsize=18)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "clinical_status_vs_esm_params.png"))
Пример #8
0
def plot_swap_acc(ax, path, parameter_configs):
    global data
    data = _prepare_data_swapacc(path, parameter_configs)
    data.sort_values(by="Agent", inplace=True)
    return

    sns.barplot(
        data=data,
        x="Agent",
        y="Swap acc.",
        ax=ax,
        edgecolor=".2",
        capsize=0.01,
        errwidth=1.5,
    )
    add_stat_annotation(
        ax,
        data=data,
        x="Agent",
        y="Swap acc.",
        test="t-test_welch",
        line_height=0.02,
        line_offset_to_box=0.04,
        box_pairs=[("DTI", "AE+MTM")],
    )
def cat_plot(type, x, y, file_name, hue=None):
    df_cat = pd.read_excel(
        r'C:\Users\chaob\Documents\Biopsy Heterogeneity Data Sheet.xlsx',
        sheet_name='Violin Plot')

    fig, ax = plt.subplots(figsize=(5, 6))

    if type == 'violin':
        sns.violinplot(x=x, y=y, hue=hue, palette='pastel', data=df_cat, ax=ax)
        fig.tight_layout()
        fig.savefig(file_name, dpi=200)

    if type == 'box':
        sns.boxplot(x=x,
                    y=y,
                    hue=hue,
                    saturation=0.5,
                    showfliers=False,
                    palette='pastel',
                    data=df_cat,
                    ax=ax)
        ax.xaxis.labelpad = 15
        ax.yaxis.labelpad = 5
        ax.set_xlabel(ax.get_xlabel(), fontsize=13)
        ax.set_ylabel(ax.get_ylabel(), fontsize=13)
        ax.tick_params(axis='both', which='major', labelsize=10.5)
        xlabels = [l.get_text() for l in ax.get_xticklabels()]
        if hue is None:
            box_pairs = list(itertools.combinations(xlabels, 2))
            sns.stripplot(x=x,
                          y=y,
                          hue=hue,
                          s=3,
                          data=df_cat,
                          alpha=0.6,
                          palette='tab10',
                          ax=ax)
        else:
            huelabels = df_cat[hue].unique().tolist()
            hue_pairs = list(itertools.combinations(huelabels, 2))
            box_pairs = []
            for xlabel in xlabels:
                for hue1, hue2 in hue_pairs:
                    pair = ((xlabel, hue1), (xlabel, hue2))
                    box_pairs.append(pair)
        add_stat_annotation(ax,
                            data=df_cat,
                            x=x,
                            y=y,
                            hue=hue,
                            box_pairs=box_pairs,
                            perform_stat_test=True,
                            test='t-test_welch',
                            loc='inside',
                            verbose=0,
                            no_ns=True,
                            fontsize='large')
        fig.tight_layout()
        fig.savefig(file_name, dpi=200)
Пример #10
0
def main(dname, out_dir):
    # prepare
    out_dir.mkdir(parents=True, exist_ok=True)
    df = pd.read_csv(dname / 'measures.csv')

    # print statistics
    print(df.groupby('method').count())
    print(df.groupby(['method'])['roc_auc'].median())
    print(df.groupby(['method'])['roc_auc'].std())

    # aggregated plot
    fig, ax = plt.subplots(figsize=(8, 6))

    sns.boxplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor'])
    for patch in ax.artists:
        r, g, b, a = patch.get_facecolor()
        patch.set_facecolor((r, g, b, 0.3))

    sns.stripplot(data=df, x='method', y='roc_auc', order=['dce', 'cor', 'pcor'])

    statannot.add_stat_annotation(
        ax,
        data=df,
        x='method',
        y='roc_auc',
        order=['dce', 'cor', 'pcor'],
        box_pairs=[('dce', 'cor'), ('dce', 'pcor')],
        test='Wilcoxon',
        text_format='simple',
        loc='outside',
        verbose=2,
    )

    ax.set_xlabel('Method')
    ax.set_ylabel('ROC-AUC')

    fig.tight_layout()
    fig.savefig(out_dir / 'method_comparison.pdf')

    # stratified plot
    g = sns.catplot(
        data=df,
        x='method',
        y='roc_auc',
        hue='perturbed_gene',
        row='treatment',
        kind='box',
        hue_order=natsorted(df['perturbed_gene'].unique()),
        aspect=2,
    )

    g.map(
        lambda **kwargs: plt.axhline(0.5, ls='dashed', color='gray', alpha=1, zorder=-1)
    )

    g.set_axis_labels('Method', 'ROC-AUC')
    g._legend.set_title('Perturbed gene(s)')

    g.savefig(out_dir / 'method_comparison_stratified.pdf')
Пример #11
0
def plot_pubtator_clean():
    sysid_primary = pd.read_csv(ROOT_DIR + "sysid/sysid_primary.csv",
                                usecols=["Entrez id"])["Entrez id"].to_list()
    sysid_candidates = pd.read_csv(ROOT_DIR + "sysid/sysid_candidates.csv",
                                   usecols=["Entrez id"
                                            ])["Entrez id"].to_list()
    princeton_negative = pd.read_csv(
        ROOT_DIR + "ASD_translated_to_ensembl.csv")["gene id"].to_list()

    pubtator = pd.read_csv(
        ROOT_DIR +
        "pubtator_central/gene_scores/gene_scores_p_cutoff_0,0001_clean.csv")

    pubtator["sys_primary"] = pubtator.gene_id.isin(sysid_primary).astype(int)
    pubtator["sys_candidate"] = pubtator.gene_id.isin(sysid_candidates).astype(
        int)
    pubtator["sys"] = pubtator.gene_id.isin(sysid_primary +
                                            sysid_candidates).astype(int)
    pubtator["sys_category"] = "unknown"
    pubtator.loc[pubtator.sys_candidate == 1, "sys_category"] = "candidate"
    pubtator.loc[pubtator.sys_primary == 1, "sys_category"] = "known NDD"
    pubtator.loc[pubtator.gene_id.isin(princeton_negative),
                 "sys_category"] = "negative control"

    order = ["unknown", "negative control", "candidate", "known NDD"]

    ax = plt.figure(figsize=(6, 6))
    ax = sns.boxplot(x="sys_category",
                     y="gene_score",
                     data=pubtator,
                     showfliers=False,
                     order=order)
    add_stat_annotation(ax,
                        data=pubtator,
                        x="sys_category",
                        y="gene_score",
                        order=order,
                        box_pairs=[("unknown", "candidate"),
                                   ("candidate", "negative control"),
                                   ("candidate", "known NDD")],
                        test='Mann-Whitney',
                        text_format='simple',
                        loc='outside',
                        line_offset_to_box=0.001,
                        line_height=0.05,
                        text_offset=2,
                        verbose=2)
    ax.set(ylim=(0, 1800))
    # ax.set(ylim=(0, 0.07))

    ax.set_title(f"pubtator gene scores")
    ax.set_xlabel("SysID category")
    ax.set_ylabel(f"gene score")

    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.get_figure().savefig(ROOT_DIR +
                            f"pubtator_central/plot_pubtator_clean.png")
    plt.show()
Пример #12
0
def box_zoom(factors, sel, hue, b, color_df, palette=None, stat=False):
    """Display boxplot for a given TCA-component associated trial factor
	
	Arguments:
		factors {list} -- list of 3 arrays containing the TCA factors
		sel {scalar} -- component selected
		hue {string} -- entry in the color_df to color code trial factors
		b {tuple} -- color list coded for learning, list of each block boundaries,
				 list of each first day trial
		color_df {pandas dataframe} -- columns [Odor, Reward, Day, Behavior] with color coded
	
	Keyword Arguments:
		palette {list} -- color palette for plotting (default: {['red', 'black']})
		stat {bool} -- add significance test stars (default: {False})
	"""

    box_df = make_box_df(factors, sel, b, color_df)
    n_blocks = len(b[1])
    if palette is None:
        palette = ['red', 'black']

    plt.rcParams['figure.figsize'] = 14, 6
    #fig = plt.figure(figsize=(14, 6))

    ax = sns.boxplot(x="Block",
                     y="Trial Factor",
                     hue=hue,
                     data=box_df,
                     palette=palette,
                     dodge=False,
                     linewidth=2,
                     fliersize=2,
                     width=.3)
    xmin, xmax, ymin, ymax = ax.axis()

    if stat:
        c1, c2 = box_df[hue].unique()[:2]
        for i in range(n_blocks):
            if len(box_df[hue][box_df['Block'] == i].unique()) < 2:
                continue
            sta.add_stat_annotation(ax,
                                    data=box_df,
                                    x="Block",
                                    y="Trial Factor",
                                    hue=hue,
                                    boxPairList=[((i, c1), (i, c2))],
                                    test='t-test',
                                    textFormat='star',
                                    loc='inside',
                                    fontsize='large',
                                    lineYOffsetAxesCoord=0.05,
                                    linewidth=0,
                                    verbose=0)

    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax * 1.15)

    plt.show()
def violin_plot(data, colname, ymax, fname):
    """ Plot a violin plot with the length of each read by novelty category"""

    sns.set_context("paper", font_scale=1.3)
    ax = sns.stripplot(x='DE_type',
                       y=colname,
                       data=data,
                       color="black",
                       alpha=0.5,
                       size=1.5,
                       jitter=True)

    ax = sns.boxplot(x='DE_type', y=colname, data=data, palette="Blues")

    add_stat_annotation(ax,
                        data=data,
                        x='DE_type',
                        y=colname,
                        box_pairs=[("Higher in Illumina", "Higher in PacBio"),
                                   ("Higher in Illumina", "Not DE"),
                                   ("Higher in PacBio", "Not DE")],
                        test='Mann-Whitney',
                        text_format='star',
                        loc='outside',
                        verbose=2)

    #ax = sns.violinplot(x='DE_type', y=colname, legend = False,
    #                    data=data,
    #                    #order=cat_order,
    #                    linewidth = 1,
    #                    inner = 'box', cut = 0)

    # Calculate number of obs per group & position labels
    nobs = list(data.groupby("DE_type").size())
    nobs = [str(x) for x in nobs]
    nobs = ["n=" + i for i in nobs]

    # Add it to the plot
    ypos = data.groupby(['DE_type'])[colname].max().dropna().values
    pos = range(len(nobs))
    for tick, label in zip(pos, ax.get_xticklabels()):
        ax.text(pos[tick],
                ypos[tick] + ypos[tick] * 0.1,
                nobs[tick],
                horizontalalignment='center',
                size='x-small',
                color='black',
                weight='semibold')

    ax.legend().set_visible(False)
    plt.xlabel("")
    plt.ylabel("GC percentage of gene")
    #ymin = min(data.groupby(['transcript_novelty'])['read_length'].min().values)
    plt.ylim(0, 100)
    plt.tight_layout()
    plt.savefig(fname, dpi=600, bbox_inches='tight')
    plt.close()
Пример #14
0
    def plot(self):
        x, y, hue, order, hue_order, box_pairs = self.x, self.y, self.hue, self.order, self.hue_order, self.box_pairs
        if not isinstance(self.x, str) and not isinstance(self.y, str):
            vis_df = pd.DataFrame()
            vis_df['x'] = x
            vis_df['y'] = y
            x = 'x'
            y = 'y'
            if self.hue is not None:
                vis_df['colour'] = self.hue
                hue = 'colour'
            if order is None:
                order = list(set(vis_df['x'].values))
                order.sort()
        else:
            vis_df = self.df
        # set the orders
        if hue_order is None and hue is not None:
            hue_order = list(set(vis_df[hue].values))
            hue_order.sort()
        if order is None:
            order = list(set(vis_df[x].values))
            order.sort()

        ax = sns.violinplot(data=vis_df, x=x, y=y, hue=hue, hue_order=hue_order, order=order, palette=self.palette,
                            showfliers=self.showfliers)
        if self.add_dots:
            ax = sns.stripplot(data=vis_df, x=x, y=y, hue_order=hue_order, order=order, alpha=0.9, s=1, color='.2')
        if self.add_stats:
            # Add all pairs in the order if the box pairs is none

            pairs = []
            if box_pairs is None:
                box_pairs = []
                for i in order:
                    for j in order:
                        if i != j:
                            # Ensure we don't get duplicates
                            pair = f'{i}{j}' if i < j else f'{j}{i}'
                            if pair not in pairs:
                                box_pairs.append((i, j))
                                pairs.append(pair)
            # Add stats annotation

            add_stat_annotation(ax, data=vis_df, x=x, y=y, order=order,
                                box_pairs=box_pairs,
                                test=self.stat_method, text_format='star', loc='inside', verbose=2,
                                pvalue_thresholds=[[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"]])
        ax.set_xticklabels(ax.get_xticklabels(), rotation=45, horizontalalignment='right')
        ax.tick_params(labelsize=self.label_font_size)
        plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0., fontsize=self.label_font_size)
        self.add_labels()
        self.set_ax_params(ax)
        plt.tight_layout()
        return ax
Пример #15
0
def plot_perspective(ax, path_persp, path_no_persp, parameter_configs_p,
                     parameter_configs_nop):

    if path_persp == "results/gridsweep":
        print("Getting data from gridsweep")
        data_perp = _prepare_data_perspective_grid()
        data_no_perp = _prepare_data_noperspective_grid()

    else:
        print("Getting data NOT from gridsweep")
        data_perp = _prepare_data_perspective(path_persp, parameter_configs_p)
        data_no_perp = _prepare_data_perspective(path_no_persp,
                                                 parameter_configs_nop)
        ax.set_ylim((0.5, 1))

    data_no_perp["Perspective"] = "No"

    data_no_perp.loc[data_no_perp["Agent"] == "AE+MTM", "Value"] += 0.002
    data_perp["Perspective"] = "Yes"

    data = pd.concat([data_perp, data_no_perp])
    data.sort_values(by="Agent", inplace=True)
    print(data)
    sns.barplot(
        data=data,
        x="Perspective",
        y="Value",
        hue="Agent",
        edgecolor=".2",
        capsize=0.01,
        errwidth=1.5,
        ax=ax,
    )

    remove_legend_titles(ax)

    add_stat_annotation(
        ax,
        line_height=0.02,
        line_offset_to_box=0.04,
        data=data,
        x="Perspective",
        y="Value",
        hue="Agent",
        test="t-test_welch",
        box_pairs=[
            (("Yes", "AE"), ("Yes", "DTI")),
            (("Yes", "AE"), ("Yes", "AE+MTM")),
            (("No", "AE"), ("No", "DTI")),
            (("No", "AE"), ("No", "AE+MTM")),
        ],
    )
    change_width_(ax, 0.22)

    ax.set_ylabel(r"Accuracy (\%)")
Пример #16
0
def col_pho_cliplot(gene):
    col_clinical_and_proteomics = col.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "phosphoproteomics",
            metadata_cols    = "Stage")
    col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal")
    col_clinical_and_proteomics.head()
## Show possible variations of Histologic_type
    col_clinical_and_proteomics["Stage"].unique()
    PhosphoSite = list(col_clinical_and_proteomics.filter(like = gene).columns.values.tolist())
    for i in PhosphoSite:
        try:
            print(i)
            col_clinical_and_proteomics = col.join_metadata_to_omics(
                metadata_df_name = "clinical",
                omics_df_name    = "phosphoproteomics",
                metadata_cols    = "Stage")
            col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal")
            col_clinical_and_proteomics = col_clinical_and_proteomics.dropna(subset = [i])
            plt.figure()
            sns.set(style      ="white",
                    font_scale = 1.5)
            order = ["Normal",
                     "Stage I",
                     "Stage II",
                     "Stage III",
                     "Stage IV"]
            ax = sns.boxplot(x          = "Stage",
                             y          = i,
                             data       = col_clinical_and_proteomics,
                             showfliers = False,
                             order      = order)
            sns.stripplot(x        = "Stage",
                          y        = i,
                          data     = col_clinical_and_proteomics,
                          color    = '.3',
                          order     = order)
            add_stat_annotation(ax,
                                data        = col_clinical_and_proteomics,
                                x           = "Stage",
                                y           = i,
                                order       = order,
                                boxPairList = [("Normal", "Stage I"),
                                               ("Normal", "Stage II"),
                                               ("Normal", "Stage III"),
                                               ("Normal", "Stage IV")],
                                test        = 't-test_ind',
                                textFormat  = 'star',
                                loc         = 'inside',
                                verbose     = 2)
            plt.title('colon cancer')
        except: ValueError
        pass
Пример #17
0
def annotate_anova(ax, data, y, anova_path, anova_sheet):
    df = pd.read_excel(anova_path, sheet_name=anova_sheet, index_col=0)
    df = df[y.split(' ')[0]]
    pvalues = []
    box_pairs = []
    for x in df.index:
        p = df[x]
        if p < 0.05:
            pvalues.append(p)
            box_pairs.append(((x, 'RU'), (x, 'LL')))
    add_stat_annotation(ax, data=data, x='EVLP ID', y=y, hue='Location',
                        box_pairs=box_pairs, pvalues=pvalues, perform_stat_test=False,
                        loc='outside', verbose=0)
Пример #18
0
def ovcliplot(gene):
    ov_clinical_and_proteomics = ov.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "phosphoproteomics",
         #   metadata_cols    = "Tumor_Stage_Ovary_FIGO",
            omics_genes      = gene)
    ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"] = ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].fillna("Normal")
    ov_clinical_and_proteomics.head()
## Show possible variations of Histologic_type
    ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].unique()
    PhosphoSite = list(ov_clinical_and_proteomics.filter(like = gene).columns.values.tolist())
    for i in PhosphoSite:
        print(i)
        ov_clinical_and_proteomics = ov.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "phosphoproteomics",
         #   metadata_cols    = "Tumor_Stage_Ovary_FIGO",
            omics_genes      = gene)
        ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"] =      ov_clinical_and_proteomics["Tumor_Stage_Ovary_FIGO"].fillna("Normal")
        # ov_clinical_and_proteomics = ov_clinical_and_proteomics.dropna(subset = [i])
        plt.figure()
        sns.set_style("white")
        order = ["Normal",
                 "IIIA",
                 "IIIB",
                 "IIIC",
                 "IV"]
        ax = sns.boxplot(x          = "Tumor_Stage_Ovary_FIGO",
                         y          = i,
                         data       = ov_clinical_and_proteomics,
                         showfliers = False,
                         order      = order)
        sns.stripplot(x        = "Tumor_Stage_Ovary_FIGO",
                      y        = i,
                      data     = ov_clinical_and_proteomics,
                      color    = '.3',
                      order     = order)
        add_stat_annotation(ax,
                            data        = ov_clinical_and_proteomics,
                            x           = "Tumor_Stage_Ovary_FIGO",
                            y           = i,
                            order       = order,
                            boxPairList = [("Normal", "IIIA"),
                                           ("Normal", "IIIB"),
                                           ("Normal", "IIIC"),
                                           ("Normal", "IV")],
                            test        = 't-test_ind',
                            textFormat  = 'star',
                            loc         = 'inside',
                            verbose     = 2)
        plt.title('ovarian cancer')
Пример #19
0
def add_annotation(ax, results_df, all_pairs, metric, box_pairs):
    """Add annotation for pairwise statistical tests to box plots."""
    import itertools as it
    from statannot import add_stat_annotation

    # do rank-based tests for all pairs, with Bonferroni correction
    pairwise_tests_df = _pairwise_compare(results_df, all_pairs, metric)

    # specify statistical tests to plot
    box_pvals = (pairwise_tests_df.set_index(
        ['data_type_1', 'data_type_2']).loc[box_pairs, :]).corr_pval.values

    # only display nearby pairs
    _ = add_stat_annotation(ax,
                            data=results_df.sort_values(by='gene'),
                            x='training_data',
                            y='delta_mean',
                            order=all_pairs,
                            box_pairs=box_pairs,
                            perform_stat_test=False,
                            pvalues=box_pvals,
                            pvalue_thresholds=[(1e-3, '***'), (1e-2, '**'),
                                               (0.05, '*'), (1, 'ns')],
                            text_format='star',
                            loc='inside',
                            verbose=0,
                            fontsize=16)

    return pairwise_tests_df
Пример #20
0
def sns_violinplot(dd,
                   my_pal,
                   figname,
                   plot_xlabels,
                   x="Gene",
                   y='value',
                   hue=None,
                   no_legend=True,
                   rotation=0,
                   annot=False):
    fig = plt.figure(figsize=(10, 10))
    ax = sns.violinplot(x=x, y=y, data=dd, hue=hue, palette=my_pal)
    gene_list = constants.analysis_config['MRNA_GENES']
    if annot:
        box_pairs = []
        for i in range(1, len(gene_list) + 1):
            if i % 2 == 0:
                box_pairs.append(((gene_list[i - 2], gene_list[i - 1])))
        add_stat_annotation(ax,
                            data=dd,
                            x=x,
                            y=y,
                            hue=hue,
                            box_pairs=box_pairs,
                            test='t-test_ind',
                            text_format='star',
                            loc='inside',
                            verbose=2)

    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.yaxis.grid(which="major", color='black', linestyle='-', linewidth=0.25)
    ax.tick_params(right=False,
                   top=False,
                   direction='out',
                   length=8,
                   width=3,
                   colors='black')
    ax.spines['left'].set_linewidth(3)
    ax.set_xticklabels(plot_xlabels, rotation=rotation)
    plt.yticks(fontsize=30)
    plt.xticks(fontsize=20)
    plt.gcf().subplots_adjust(bottom=0.2, left=0.2)
    if no_legend:
        ax.legend_.remove()
    fig.savefig(figname, format='png', dpi=600)
    plt.close()
Пример #21
0
def plot_param_diff_acc_status(esm_res, output_dir):
    sns.set_style("whitegrid", {'axes.grid': False})
    yaxis_labels = [
        "Deltas (Clearance Parameter)", "Betas (Production Parameter)",
        "Beta Delta Ratio (Log)"
    ]
    plt.figure(figsize=(19, 7))
    nrows = 1
    ncols = 3
    titles = ["Clearance", "Production", "Production/Clearance"]
    for i, y in enumerate(["DELTAS_est", "BETAS_est", "BDR_log"]):
        j = i + 1
        plt.subplot(nrows, ncols, j)
        yaxis_label = yaxis_labels[i]
        pal = {"No": "mediumblue", "Yes": "red"}
        face_pal = {"No": "cornflowerblue", "Yes": "indianred"}
        y = y
        x = "Accumulator"
        data = esm_res[esm_res.Mutation == 1]
        g = sns.boxplot(data=data, x=x, y=y, palette=face_pal, fliersize=0)
        sns.stripplot(x=x,
                      y=y,
                      data=data,
                      jitter=True,
                      dodge=True,
                      linewidth=0.5,
                      palette=pal)
        g.set_xticklabels(["Non-accumulator", "Accumulator"], fontsize=24)
        add_stat_annotation(g,
                            data=data,
                            x=x,
                            y=y,
                            box_pairs=[("No", "Yes")],
                            test='t-test_ind',
                            text_format='star',
                            loc='inside',
                            verbose=2,
                            fontsize=18)
        plt.xlabel("", fontsize=24)
        plt.ylabel("", fontsize=18)
        plt.title(titles[i], fontsize=24)
        plt.rc('xtick', labelsize=24)
        plt.rc('ytick', labelsize=24)
    plt.tight_layout()
    plt.savefig(os.path.join(output_dir, "param_diff_acc_status.png"))
    plt.close()
Пример #22
0
def ov_pho_cli_plot(gene):
    ov_clinical_and_proteomics = ov.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "phosphoproteomics",
            metadata_cols    = "Sample_Tumor_Normal",
            omics_genes      = gene)
    ov_clinical_and_proteomics.head()
    ov_clinical_and_proteomics = ov_clinical_and_proteomics.loc[:, ~ov_clinical_and_proteomics.columns.duplicated()]
## Show possible variations of Histologic_type
    ov_clinical_and_proteomics["Sample_Tumor_Normal"].unique()
    Genes = list(ov_clinical_and_proteomics.filter(like = gene).columns.values.tolist())
    for i in Genes:
            print(i)
            ov_clinical_and_proteomics = ov.join_metadata_to_omics(
                        metadata_df_name = "clinical",
                        omics_df_name    = "phosphoproteomics",
                        metadata_cols    = "Sample_Tumor_Normal",
                        omics_genes      = gene)
            ov_clinical_and_proteomics = ov_clinical_and_proteomics.loc[:, ~ov_clinical_and_proteomics.columns.duplicated()]
            ov_clinical_and_proteomics = ov_clinical_and_proteomics.dropna(subset = [i])
            plt.figure()
            sns.set_style("white")
            order      = ["Normal", "Tumor"]
            ax = sns.boxplot(x          = "Sample_Tumor_Normal",
                             y          = i,
                             data       = ov_clinical_and_proteomics,
                             showfliers = False,
                             order      = order)
            sns.stripplot(x        = "Sample_Tumor_Normal",
                          y        = i,
                          data     = ov_clinical_and_proteomics,
                          color    = '.3',
                          order     = order)
            add_stat_annotation(ax,
                                data        = ov_clinical_and_proteomics,
                                x           = "Sample_Tumor_Normal",
                                y           = i,
                                order       = order,
                                boxPairList = [("Normal", "Tumor")],
                                test        = 't-test_ind',
                                textFormat  = 'star',
                                loc         = 'inside',
                                verbose     = 2)
            plt.title('ovarian cancer')
Пример #23
0
def analyze_whole_chr_anp_frac_samplewise_all(wgd_plus_fracs, wgd_minus_fracs):

    wgd_status = ["WGD+"] * len(wgd_plus_fracs.index) + ["WGD-"] * len(
        wgd_minus_fracs.index)
    print(wgd_plus_fracs.head())
    print(wgd_minus_fracs.head())
    df_all = pd.concat([wgd_plus_fracs, wgd_minus_fracs],
                       axis=0,
                       ignore_index=True)
    # df_all = pd.concat([df_all, pd.Series(wgd_status)], axis = 1)
    df_all["wgd_status"] = wgd_status
    print(df_all.head())
    df_all.to_csv(os.path.join(src_folder, "Type_wise_df_wca_frac.tsv"),
                  sep="\t",
                  index=0)

    plt.figure(figsize=(30, 10))
    ax = sns.boxplot(
        x="wgd_status",
        y="wca_frac",
        data=df_all,
        hue="wgd_status",
        palette="Set1",
        showfliers=False,
    )

    ax.set_ylim((0, 1.1))
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
    pairs = [("WGD+", "WGD-")]

    add_stat_annotation(
        ax,
        x="wgd_status",
        y="wca_frac",
        data=df_all,
        box_pairs=pairs,
        test="t-test_ind",
        text_format="star",
        loc="inside",
        verbose=0,
    )

    plt.show()
Пример #24
0
def add_annot(data, gene_list, ax, test):
    dd = pd.DataFrame(dict([
        (k, pd.Series(v)) for k, v in data.items()
    ])).melt().dropna().rename(columns={"variable": "gene"})
    box_pairs = []
    for i in range(1, len(gene_list) + 1):
        if i % 2 == 0:
            box_pairs.append(tuple((gene_list[i - 2], gene_list[i - 1])))
    # test value should be one of the following:
    add_stat_annotation(ax,
                        data=dd,
                        x='gene',
                        y='value',
                        hue=None,
                        box_pairs=box_pairs,
                        test=test,
                        text_format='star',
                        loc='inside',
                        verbose=2)
Пример #25
0
def barplots(alphas, betas, thetas):
    sns.set(style='white', font_scale=2)
    p_values = []
    list = [thetas, alphas, betas]

    for l, frequency in enumerate(['theta', 'alpha', 'beta']):
        p_values.append(
            stats.ttest_ind(list[l][0], list[l][1], equal_var=False)[1])

    p_values = np.array(p_values) * len(p_values)

    dataframe = pd.DataFrame()
    dataframe['States'] = ['slow'] * len(alphas[0]) + ['fast'] * len(alphas[1])
    dataframe['Alpha'] = alphas[0] + alphas[1]
    dataframe['Theta'] = thetas[0] + thetas[1]
    dataframe['Beta'] = betas[0] + betas[1]

    fig, ax = plt.subplots(1, 3, figsize=(17, 20), squeeze=False)
    place = {0: (0, 0), 1: (0, 1), 2: (0, 2)}

    for l, frequency in enumerate(['Theta', 'Alpha', 'Beta']):
        sns.barplot(ax=ax[place[l]],
                    y=frequency,
                    x='States',
                    data=dataframe,
                    capsize=0.1).set(xlabel='', ylabel=frequency)
        add_stat_annotation(ax[place[l]],
                            y=frequency,
                            x='States',
                            data=dataframe,
                            box_pairs=[('slow', 'fast')],
                            perform_stat_test=False,
                            pvalues=[p_values[l]],
                            text_format='star',
                            loc='outside',
                            verbose=2,
                            comparisons_correction=None,
                            line_offset=0.02,
                            text_offset=0.01)

    fig.subplots_adjust(wspace=0.38, hspace=0.62)
    fig.show()
    return p_values
Пример #26
0
def en_phos_cli_plot(gene):
    en_clinical_and_proteomics = en.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "phosphoproteomics_gene",
            metadata_cols    = "tumor_Stage-Pathological",
            omics_genes      = gene)
    en_clinical_and_proteomics["tumor_Stage-Pathological"] = en_clinical_and_proteomics["tumor_Stage-Pathological"].fillna("Normal")
    en_clinical_and_proteomics.head()
## Show possible variations of Histologic_type
    en_clinical_and_proteomics["tumor_Stage-Pathological"].unique()
    PhosphoSite = list(en_clinical_and_proteomics.filter(like = gene).columns.values.tolist())
    for i in PhosphoSite:
        print(i)
        en_clinical_and_proteomics = en_clinical_and_proteomics.dropna(subset = [i])
        plt.figure()
        sns.set(style      ="white",
                font_scale = 1.5)
        order      = ["Normal", "Stage I", "Stage II", "Stage III", "Stage IV"]
        ax = sns.boxplot(x          = "tumor_Stage-Pathological",
                         y          = i,
                         data       = en_clinical_and_proteomics,
                         showfliers = False,
                         order      = order)
        sns.stripplot(x        = "tumor_Stage-Pathological",
                      y        = i,
                      data     = en_clinical_and_proteomics,
                      color    = '.3',
                      order     = order)
        add_stat_annotation(ax,
                            data        = en_clinical_and_proteomics,
                            x           = "tumor_Stage-Pathological",
                            y           = i,
                            order       = order,
                            boxPairList = [("Normal", "Stage I"),
                                           ("Normal", "Stage II"),
                                           ("Normal", "Stage III"),
                                           ("Normal", "Stage IV")],
                            test        = 't-test_ind',
                            textFormat  = 'star',
                            loc         = 'inside',
                            verbose     = 2)
        plt.title('endometrial cancer')
Пример #27
0
def plot_allele_frequency(df: pd.DataFrame,
                          fname: str,
                          af_col: str = "gnomADg_AF"):
    """
    Plots allele frequencies for each class

    :param pd.DataFrame df: Input df
    :param str fname: Output basemame
    :param str af_col: Column name that accounts
        for allele frequencies. Default: `gnomAD_genomes`.
        If column does not exist, analysis will be skipped.
    """

    if af_col not in df.columns:
        return

    df['grouper'] = df['outcome'].astype(
        str) + '\nN = ' + df['count_class'].astype(str)
    order = sorted(list(df['grouper'].unique()))

    ax = sns.boxplot(data=df, x="grouper", order=order, y=af_col)
    try:
        add_stat_annotation(ax,
                            data=df,
                            x="grouper",
                            y=af_col,
                            order=order,
                            box_pairs=[tuple(order)],
                            test='Mann-Whitney',
                            text_format='star',
                            loc='inside',
                            verbose=0,
                            pvalue_format_string='{:.4f}')
        plt.xlabel("")
        plt.ylabel("Allele frequency")
        plt.tight_layout()
        out = fname + '.pdf'
        plt.savefig(out)
        plt.close()
    except ValueError:
        plt.close()
        pass
Пример #28
0
    def boxes(self):

        plt.clf()

        tests_combinations = list(combinations(self.experiments, 2))

        full_data_agreg = pd.read_csv(f'{self.dir}{self.anal}_full_data.csv')
        full_data_agreg = full_data_agreg[full_data_agreg['experiment'].isin(
            self.experiments)]

        full_data_agreg = full_data_agreg[full_data_agreg['episode'] ==
                                          full_data_agreg["episode"].max()]

        print(full_data_agreg)

        for idx_measure, measure in enumerate(self.measures):
            sb.set()
            sb.set_style("whitegrid")

            plot = sb.boxplot(x='experiment',
                              y=measure,
                              data=full_data_agreg,
                              palette=self.clrs)  # hue='Style',

            #remove bonferroni correction?
            if len(tests_combinations) > 0:
                add_stat_annotation(
                    plot,
                    data=full_data_agreg,
                    x='experiment',
                    y=measure,  # order=order,
                    box_pairs=tests_combinations,
                    test='Wilcoxon',
                    text_format='star',
                    loc='inside',
                    verbose=2)

            plt.title(self.anal)
            plot.get_figure().savefig(
                f'{self.dir}{self.anal}_{measure}_box.png')
            plt.clf()
Пример #29
0
def col_tra_cli_plot(gene):
    col_clinical_and_proteomics = col.join_metadata_to_omics(
            metadata_df_name = "clinical",
            omics_df_name    = "transcriptomics",
            metadata_cols    = "Stage",
            omics_genes      = gene)
    col_clinical_and_proteomics["Stage"] = col_clinical_and_proteomics["Stage"].fillna("Normal")
    col_clinical_and_proteomics.head()
## Show possible variations of Histologic_type
    col_clinical_and_proteomics["Stage"].unique()
    sns.set(style      ="white",
            font_scale = 1.5)
    order = ["Normal",
             "Stage I",
             "Stage II",
             "Stage III",
             "Stage IV"]
    ax = sns.boxplot(x          = "Stage",
                     y          = gene + '_transcriptomics',
                     data       = col_clinical_and_proteomics,
                     showfliers = False,
                     order      = order)
    ax = sns.stripplot(x        = "Stage",
                  y        = gene + '_transcriptomics',
                  data     = col_clinical_and_proteomics,
                  color    = '.3',
                  order     = order)
    add_stat_annotation(ax,
                        data        = col_clinical_and_proteomics,
                        x           = "Stage",
                        y           = gene + '_transcriptomics',
                        order       = order,
                        boxPairList = [("Normal", "Stage I"),
                                       ("Normal", "Stage II"),
                                       ("Normal", "Stage III"),
                                       ("Normal", "Stage IV")],
                        test        = 't-test_ind',
                        textFormat  = 'star',
                        loc         = 'inside',
                        verbose     = 2)
    plt.title('colon cancer')
Пример #30
0
def plot_agreement(ax, path):
    data = _prepare_data_agreement(path)
    data.sort_values(by="Agent", inplace=True)
    sns.barplot(
        data=data,
        ax=ax,
        x="Agent",
        y="Agreement",
        edgecolor=".2",
        capsize=0.01,
        errwidth=1.5,
    )
    add_stat_annotation(
        ax,
        data=data,
        x="Agent",
        y="Agreement",
        test="t-test_welch",
        line_height=0.02,
        line_offset_to_box=0.04,
        box_pairs=[("DTI", "AE+MTM")],
    )