categories=(
        site_stat_df.groupby("site")["num_assigned_cells"]
        .sum()
        .sort_values(ascending=False)
        .index.tolist()
    ),
)

unique_pert_count_gg = (
    gg.ggplot(num_unique_pert_df, gg.aes(x="site", y="pert_count"))
    + gg.geom_bar(gg.aes(fill="pert_class"), stat="identity")
    + gg.theme_bw()
    + gg.theme(axis_text_x=gg.element_text(rotation=90, size=5))
    + gg.xlab("Sites")
    + gg.ylab("Perturbation Count")
    + gg.scale_fill_discrete(name="Perturbation Class")
)

output_file = pathlib.Path(
    output_figuresdir, "all_cellpainting_unique_perturbations_across_sites.png"
)
if check_if_write(output_file, force, throw_warning=True):
    unique_pert_count_gg.save(output_file, dpi=300, width=10, height=7, verbose=False)

# Process overall perturbation counts per batch
pert_count_df = pd.concat(pert_counts_list, axis="rows").reset_index()

# Output a full count of perturbations per site
output_file = pathlib.Path(
    output_resultsdir, "complete_perturbation_count_per_site.tsv.gz"
)
示例#2
0
             strip_background=gg.element_rect(colour="black", fill="#fdfff4")))

file = pathlib.Path("figures", "predictions",
                    "wt_parental_single_cell_proba.png")
wt_gg.save(file, height=3, width=6, dpi=400)

wt_gg

# In[10]:

resistant_clone_gg = (
    gg.ggplot(scores_df.query("Metadata_clone_number == 'WT parental'"),
              gg.aes(y="Clone A", x="Clone E")) +
    gg.geom_point(gg.aes(fill="Metadata_clone_number"), size=0.01, alpha=0.2) +
    gg.facet_grid("data_fit~shuffle_label") + gg.xlab("Clone E") +
    gg.ylab("Clone A") + gg.theme_bw() + gg.scale_fill_discrete(guide=False) +
    gg.theme(strip_text=gg.element_text(size=6, color="black"),
             strip_background=gg.element_rect(colour="black", fill="#fdfff4")))

file = pathlib.Path("figures", "predictions", "resistant_clone_scatter.png")
resistant_clone_gg.save(file, height=6, width=4, dpi=400)

resistant_clone_gg

# In[11]:

other_treatment_gg = (
    gg.ggplot(scores_df.query("data_fit == 'othertreatment'"),
              gg.aes(y="Clone A", x="treatment_label")) +
    gg.geom_boxplot(gg.aes(fill="Metadata_clone_number")) +
    gg.facet_grid("data_fit~shuffle_label") + gg.xlab("Bortezomib dose") +
# In[19]:


# Visualize UMAP results
clone_facet_gg = (
    gg.ggplot(embedding_df, gg.aes('x', 'y'))
    + gg.geom_point(
        gg.aes(fill='factor(Metadata_Plate)', shape="Metadata_treatment"),
        alpha=0.6
    )
    + gg.theme_bw()
    + gg.xlab("UMAP X")
    + gg.ylab("UMAP Y")
    + gg.scale_shape_manual(name="Treatment", values=[".", "+"])
    + gg.scale_fill_discrete(name="Plate")
    + gg.facet_wrap("~Metadata_clone_number")
    + gg.ggtitle("Four Clone Dataset - Merged")
    + gg.theme(
        legend_key=gg.element_rect(color="black", fill = "white"),
        strip_text=gg.element_text(size=6, color="black"),
        strip_background=gg.element_rect(colour="black", fill="#fdfff4")
    )
)
    
file = os.path.join("figures", "umap", "four_clone_umap_facet_clone_sample")
for extension in save_file_extensions:
    clone_facet_gg.save(filename='{}{}'.format(file, extension), height=4, width=4.5, dpi=400)

clone_facet_gg
示例#4
0
    document=full_paper_dataset.document.tolist(),
)
print(full_dataset.shape)

full_dataset.to_csv(
    Path("output/paper_dataset") / Path("paper_dataset_full_tsne.tsv"),
    sep="\t",
    index=False,
)

full_dataset.head()
# -

g = (p9.ggplot(full_dataset.sample(10000, random_state=100)) +
     p9.aes(x="dim1", y="dim2", fill="journal") + p9.geom_point() +
     p9.scale_fill_discrete(guide=False))
print(g)

# # Generate Bin plots

# ## Square Plot

data_df = pd.read_csv(
    Path("output") / Path("paper_dataset") /
    Path("paper_dataset_full_tsne.tsv"),
    sep="\t",
)
print(data_df.shape)
data_df.head()

data_df.describe()
示例#5
0
def make_bar_chart(survey_data, topic, facet_by=[], proportional=False):
    """Make a barchart showing the number of respondents listing each 
        column that starts with topic for a single year. If facet_by is
        not empty, the resulting plot will be faceted into subplots 
        by the variables given. 

    Args:
        survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey   
        topic (str): String that all questions of interest start with 
        facet_by (list,optional): List of columns use for grouping
        proportional (bool, optiona ): Defaults to False. If True,
            the bars heights are determined proportionally to the 
            total number of responses in that facet. 

    Returns:
        (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file
    """
    show_legend = False
    if facet_by:
        show_legend = True

    topic_data_long = get_single_year_data_subset(survey_data, topic, facet_by)

    x = topic_data_long.columns.tolist()
    x.remove("level_1")

    if facet_by:
        period = False
        if "." in facet_by:
            facet_by.remove(".")
            period = True

        aggregate_data = (
            topic_data_long[topic_data_long.rating == 1]
            .dropna()
            .groupby(["level_0"] + facet_by)
            .count()
            .reset_index()
        )

        if period:
            facet_by.append(".")

    else:
        aggregate_data = (
            topic_data_long[topic_data_long.rating == 1]
            .dropna()
            .groupby("level_0")
            .count()
            .reset_index()
        )

    if proportional and facet_by:
        period = False
        if "." in facet_by:
            facet_by.remove(".")
            period = True

        facet_sums = (
            topic_data_long[topic_data_long.rating == 1]
            .dropna()
            .groupby(facet_by)
            .count()
            .reset_index()
        )

        aggregate_data = aggregate_data.merge(facet_sums, on=facet_by).rename(
            columns={"level_0_x": "level_0"}
        )
        aggregate_data = aggregate_data.assign(
            rating=aggregate_data.rating_x / aggregate_data.rating_y
        )

        if period:
            facet_by.append(".")

    br = (
        p9.ggplot(aggregate_data, p9.aes(x="level_0", fill="level_0", y="rating"))
        + p9.geom_bar(show_legend=show_legend, stat="identity")
        + p9.theme(
            axis_text_x=p9.element_text(angle=45, ha="right"),
            strip_text_y=p9.element_text(angle=0, ha="left"),
        )
        + p9.scale_x_discrete(
            limits=topic_data_long["level_0"].unique().tolist(),
            labels=[
                "\n".join(
                    textwrap.wrap(x.replace(topic, "").replace("_", " "), width=35)[0:2]
                )
                for x in topic_data_long["level_0"].unique().tolist()
            ],
        )
    )

    if facet_by:
        br = (
            br
            + p9.facet_grid(
                facet_by, shrink=False, labeller=lambda x: "\n".join(wrap(x, 15))
            )
            + p9.theme(
                axis_text_x=p9.element_blank(),
                strip_text_x=p9.element_text(
                    wrap=True, va="bottom", margin={"b": -0.5}
                ),
            )
            + p9.scale_fill_discrete(
                limits=topic_data_long["level_0"].unique().tolist(),
                labels=[
                    "\n".join(
                        wrap(
                            x.replace(topic, "")
                            .replace("_", " ")
                            .replace("/", "/  ")
                            .strip(),
                            30,
                        )
                    )
                    for x in topic_data_long["level_0"].unique().tolist()
                ],
            )
        )
    return br
gg_rep_act.save(os.path.join(dir_output, 'gg_rep_act.png'), width=8, height=4)

di_notes = {
    'chi2': 'χ2-correction',
    'insig': 'Erroneous',
    'specification': 'Specification',
    'non-replicable': 'Inconsistent'
}
# (ii) Breakdown of counts
tmp = acc_tt.merge(
    res_fisher.tt.value_counts().reset_index().rename(columns={
        'index': 'tt',
        'tt': 'n_lit'
    }))
tmp = tmp.assign(tt=lambda x: x.tt.map(di_tt),
                 notes=lambda x: x.notes.map(di_notes),
                 share=lambda x: x.n / x.n_lit)

gg_acc_notes = (
    pn.ggplot(tmp, pn.aes(x='notes', y='share', fill='tt')) + pn.theme_bw() +
    pn.scale_y_continuous(labels=percent_format(), limits=[0, 0.1]) +
    pn.scale_fill_discrete(name='Literature') +
    pn.geom_col(color='black', position=pn.position_dodge(0.5), width=0.5) +
    pn.labs(y='Percent', x='Investigation') +
    pn.theme(axis_text_x=pn.element_text(angle=45),
             axis_title_x=pn.element_blank()))
gg_acc_notes.save(os.path.join(dir_output, 'gg_acc_notes.png'),
                  width=7,
                  height=3)

print('~~~ End of 4_results_insig.py ~~~')
示例#7
0
        var_ordered = df[var_col][df[val_col].sort_values(
            ascending=ascending).index.tolist()]
    df[var_col] = pd.Categorical(df[var_col],
                                 categories=list(reversed(list(var_ordered))),
                                 ordered=True)
    return (df)


skills_summary_df = sort_df(skills_summary_df, var_col="variable")
skills_summary_df["type"] = pd.Categorical(skills_summary_df["type"])

skills_summary_df["type"] = skills_summary_df["type"].cat.reorder_categories(
    ["Requirements", "Assets"])

(p9.ggplot(skills_summary_df, p9.aes('attribute', 'value', fill='variable')) +
 p9.geom_col() + p9.coord_flip() + p9.scale_fill_discrete(guide=False))
#skills_summary_df["type"]

#Languages
languages = ["R", "sql", "python", "java", "scala", "C", "sas"]

lang_clean = {
    "sql": "SQL",
    "python": "Python",
    "R": "R",
    "java": "Java",
    "scala": "Scala",
    "C": "C",
    "sas": "SAS"
}
def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')