def plot_range_comparison(self,
                              xlabel: str = '',
                              figsize: Tuple[int] = (7, 3),
                              add_text_label: bool = True,
                              **kwargs):
        df = self.get_ranges_df(**kwargs)
        fig = (p9.ggplot(df) +
               p9.aes('cat_value', 'counts', fill='direction') +
               p9.geom_col(alpha=.8) +
               p9.theme(figure_size=figsize,
                        axis_text_x=p9.element_text(rotation=45)) +
               p9.scale_fill_manual(['#3f7f93', '#da3b46', '#4d4a4a']) +
               p9.labs(x=xlabel, y='Number of Comparisons', fill='R'))

        if add_text_label:
            if df.loc[df.direction == 'Positive'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .15'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Positive'].loc[df.counts > 0],
                    color='#3f7f93')
            if df.loc[df.direction == 'Negative'].loc[df.counts > 0].size > 0:
                fig += p9.geom_text(
                    p9.aes(label='label', x='cat_value', y='n + max(n) * .05'),
                    inherit_aes=False,
                    size=9,
                    data=df.loc[df.direction == 'Negative'].loc[df.counts > 0],
                    color='#da3b46')

        return fig
示例#2
0
def plot_mem(df):
    x = df.copy()
    # initialise some extra columns useful for plotting
    x['new_cols'] = [str(i) for i in x['col_name']]
    x['new_cols'] = pd.Categorical(x['new_cols'],
                                   categories=x['new_cols'],
                                   ordered=True)
    x['cnt_print_loc_pos'] = (x.pcnt.values) + (np.max(x.pcnt.values)) / 70
    x['cnt_print_loc_neg'] = (x.pcnt.values) - (np.max(x.pcnt.values)) / 70
    # build basic plot
    ggplt  = p9.ggplot(x, p9.aes(x = 'new_cols', y = 'pcnt', fill = 'new_cols')) \
      + p9.geom_bar(stat = 'identity') \
      + p9.guides(fill = False) \
      + p9.ylab('% of total size') \
      + p9.xlab('') \
      + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))

    # add text labels to the highest bars
    y1 = x.copy()[x.pcnt > 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_neg', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y1, color = 'white', \
        angle = 90, vjust = 'top')
    # add text labels to the lower bars
    y2 = x.copy()[x.pcnt <= 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_pos', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y2, color = 'gray', \
        angle = 90, vjust = 'bottom')
    return ggplt
示例#3
0
def test_text_aesthetics():
    p = (
        ggplot(df, aes(y='y', label='label')) +
        geom_text(aes('x', label='label'), size=15, ha='left') + geom_text(
            aes('x+1', angle='angle'), size=15, va='top', show_legend=False) +
        geom_text(
            aes('x+2', label='label', alpha='z'), size=15, show_legend=False) +
        geom_text(aes('x+3', color='factor(z)'), size=15, show_legend=False) +
        geom_text(aes('x+5', size='z'), ha='right', show_legend=False) +
        scale_size_continuous(range=(12, 30)) +
        scale_y_continuous(limits=(-0.5, n - 0.5)))

    assert p == 'text_aesthetics'
示例#4
0
def test_text_aesthetics():
    p = (ggplot(df, aes(y='y', label='label')) +
         geom_text(aes('x', label='label'), size=15, ha='left') +
         geom_text(aes('x+1', angle='angle'),
                   size=15, va='top', show_legend=False) +
         geom_text(aes('x+2', label='label', alpha='z'),
                   size=15, show_legend=False) +
         geom_text(aes('x+3', color='factor(z)'),
                   size=15, show_legend=False) +
         geom_text(aes('x+5', size='z'),
                   ha='right', show_legend=False) +
         scale_size_continuous(range=(12, 30)) +
         scale_y_continuous(limits=(-0.5, n-0.5)))

    assert p == 'text_aesthetics'
def test_stat_count_float():
    df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1.5, 2.5]})

    p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() +
         geom_text(aes(label=after_stat('count')), stat='count'))

    assert p + _theme == 'stat-count-float'
示例#6
0
def plot_significance_vs_ranking(
    summary_df, method_name, x_label, output_figure_filename
):
    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Test statistic": summary_df[
                method_stats_dict[method_name] + " (Real)"
            ].values,
            "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values,
        },
        index=summary_df.index,
    )

    fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank"))
    fig += pn.geom_point()
    fig += pn.geom_point(
        plot_df[plot_df["Percentile rank"] > 0.9],
        pn.aes(x="Test statistic", y="Percentile rank"),
        color="red",
    )
    fig += pn.geom_text(
        pn.aes(
            label=[
                x if plot_df.loc[x, "Percentile rank"] > 0.9 else ""
                for x in plot_df.index
            ]
        ),
        ha="left",
        va="top",
        size=5,
    )
    fig += pn.labs(
        x=x_label,
        y="Percentile of ranking",
        title=f"{method_name} pathway statistics vs ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    print(fig)

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
示例#7
0
 def getErrorPlot(self, msg="Error Occured"):
     df = DataFrame({"x": [10], "y": [2], "label": [msg]})
     p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="white") \
         + THEME.cat_colors_lines \
           + THEME.mt \
           + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank())
     return p
示例#8
0
def test_stat_count_int():
    df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1, 2]})

    p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() +
         geom_text(aes(label='stat(count)'), stat='count'))

    assert p + _theme == 'stat-count-int'
示例#9
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (ggplot(len_df) + aes(x='x', fill='Method', y='..density..') +
           geom_histogram(binwidth=2, position='identity', alpha=.6) +
           geom_text(aes(x='x', y=.22, label='x', color='Method'),
                     mean_len_df,
                     inherit_aes=False,
                     format_string='{:.1f}',
                     show_legend=False) +
           geom_segment(aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
                        mean_len_df,
                        inherit_aes=False,
                        color='black') + scale_linetype_manual(['dashed']) +
           facet_wrap('Task') + xlim(0, 20) + ylim(0, .23) +
           xlab('Example Length') + ylab('Frequency') +
           scale_color_manual(values=COLORS) +
           scale_fill_manual(values=COLORS) + theme_fs() + theme(
               aspect_ratio=1,
               legend_title=element_blank(),
               legend_position=legend_position,
               legend_box=legend_box,
           ))

    return plt
示例#10
0
def plot_vs_discrete(data_table,
                     discrete_metric_name,
                     metric_name,
                     segment_name,
                     title,
                     ylim=None,
                     aggregate="mean"
                     ):
    data_filtered = \
        data_table.loc[((pd.notnull(data_table[metric_name])) & (pd.notnull(data_table[discrete_metric_name])))][
            [discrete_metric_name, metric_name, segment_name]]

    data_filtered[[metric_name]] = data_filtered[[metric_name]].astype(float)
    result = data_filtered.groupby([discrete_metric_name, segment_name]).agg({metric_name: aggregate}).reset_index()
    result[metric_name] = round(result[metric_name], 3)

    gg_result = plot.ggplot(result) + plot.aes(x=discrete_metric_name,
                                               y=metric_name,
                                               fill=segment_name,
                                               label=metric_name
                                               ) + \
                plot.geom_bar(stat="identity", position="dodge") + \
                plot.geom_text(position=plot.position_dodge(width=.9), size=8) + \
                plot.labs(x=discrete_metric_name, y=aggregate + "(" + metric_name + ")", title=title)

    if pd.notnull(ylim):
        gg_result = gg_result + plot.ylim(ylim)

    return gg_result
示例#11
0
文件: plots.py 项目: mappin/asxtrade
def make_sentiment_plot(sentiment_df, exclude_zero_bin=True, plot_text_labels=True):
    rows = []
    print(
        "Sentiment plot: exclude zero bins? {} show text? {}".format(
            exclude_zero_bin, plot_text_labels
        )
    )

    for column in filter(lambda c: c.startswith("bin_"), sentiment_df.columns):
        c = Counter(sentiment_df[column])
        date = column[4:]
        for bin_name, val in c.items():
            if exclude_zero_bin and (bin_name == "0.0" or not isinstance(bin_name, str)):
                continue
            bin_name = str(bin_name)
            assert isinstance(bin_name, str)
            val = int(val)
            rows.append(
                {
                    "date": datetime.strptime(date, "%Y-%m-%d"),
                    "bin": bin_name,
                    "value": val,
                }
            )

    df = pd.DataFrame.from_records(rows)
    # print(df['bin'].unique())
    # HACK TODO FIXME: should get from price_change_bins()...
    order = [
        "-1000.0",
        "-100.0",
        "-10.0",
        "-5.0",
        "-3.0",
        "-2.0",
        "-1.0",
        "-1e-06",
        "1e-06",
        "1.0",
        "2.0",
        "3.0",
        "5.0",
        "10.0",
        "25.0",
        "100.0",
        "1000.0",
    ]
    df["bin_ordered"] = pd.Categorical(df["bin"], categories=order)

    plot = (
        p9.ggplot(df, p9.aes("date", "bin_ordered", fill="value"))
        + p9.geom_tile(show_legend=False)
        + p9.theme_bw()
        + p9.xlab("")
        + p9.ylab("Percentage daily change")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), figure_size=(10, 5))
    )
    if plot_text_labels:
        plot = plot + p9.geom_text(p9.aes(label="value"), size=8, color="white")
    return plot_as_inline_html_data(plot)
示例#12
0
def plot_train_test(ags):
    frontiers = data.train_test(ags)
    frontiers, model = data.train_test_model(frontiers)

    labs = frontiers.sort_values('train_flops').groupby(
        'elo').first().reset_index()
    desc = f'log₁₀(test) = {model.params[1]:.1f} · log₁₀(train) + {model.params[2]:.1g} · elo + {model.params[0]:.0f}'

    return (
        pn.ggplot(
            frontiers,
            pn.aes(x='train_flops', y='test_flops', color='elo',
                   group='elo')) + pn.geom_line(size=.5, show_legend=False) +
        pn.geom_line(pn.aes(y='test_flops_hat'),
                     size=.25,
                     show_legend=False,
                     linetype='dashed')
        # + pn.geom_point(size=.5, show_legend=False)
        + pn.geom_text(pn.aes(label='elo.astype(int)'),
                       labs,
                       show_legend=False,
                       size=6,
                       nudge_y=+.2) + pn.scale_color_cmap(limits=(-1500, 0)) +
        pn.scale_x_continuous(trans='log10') +
        pn.scale_y_continuous(trans='log10') + pn.annotate(
            'text', 1.5e13, 5e9, label=desc, ha='left', size=6, family='serif')
        + pn.labs(x='Train-time compute (FLOPS-seconds)',
                  y='Test-time compute (FLOPS-seconds)') + plot.IEEE())
示例#13
0
def summary(tags, opts=None):
    print(tags)
    tags_summary = (
        tags.groupby(["tag", "background"])
        .agg({"tag": "count"})
        .rename(columns={"tag": "n_tags"})
        .reset_index()
        .astype({"background": "category", "tag": "category"})
    )
    print(tags_summary)
    # tags_summary = tags_df.groupby(["species"]).agg(
    #     {"tag_duration": "sum", "species": "count"}
    # )

    # tags_summary.rename(columns={"species": "count"}, inplace=True)

    # tags_summary["tag_duration"] = tags_summary.tag_duration.astype(int)
    # tags_summary["duration"] = tags_summary.tag_duration.astype(str) + "s"
    # tags_summary = tags_summary.reindex(list(SPECIES_LABELS.keys()))
    # # tags_summary["species"] = tags_summary.index
    # tags_summary.reset_index(inplace=True)
    # tags_summary
    # (
    #     ggplot(
    #         data=tags_summary,
    #         mapping=aes(
    #             x="factor(species, ordered=False)",
    #             y="tag_duration",
    #             fill="factor(species, ordered=False)",
    #         ),
    #     )
    #     + geom_bar(stat="identity", show_legend=False)
    #     + xlab("Species")
    #     + ylab("Duration of annotations (s)")
    #     + geom_text(mapping=aes(label="count"), nudge_y=15)
    #     + theme_classic()
    #     + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    # ).save("species_repartition_duration_mini.png", width=10, height=8)

    plt = (
        ggplot(
            data=tags_summary,
            mapping=aes(
                x="tag",  # "factor(species, ordered=False)",
                y="n_tags",
                fill="background",  # "factor(species, ordered=False)",
            ),
        )
        + geom_bar(stat="identity", show_legend=True, position=position_dodge())
        + xlab("Species")
        + ylab("Number of annotations")
        + geom_text(mapping=aes(label="n_tags"), nudge_y=15)
        + theme_classic()
        + theme(axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}))
        # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    ).save("tag_species_bg.png", width=10, height=8)
    # print(tags_summary)

    print(plt)
示例#14
0
def p(N=3):
    """Return *N* distinct plot objects."""
    template = (
        ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) +
        geom_text()
        )
    for i in range(1, N+1):
        yield template + ggtitle('%d of %d' % (i, N))
示例#15
0
def plot_revigo(
    rev,
    outline=2,
    expand_points=(1.05, 1.2),
    figure_size=(8, 8),
    font_size=8,
    point_size=3,
    point_alpha=0.7,
    palette='RdPu',
    dispensability_cutoff=1.,
    show_all_labels=False,
    text_column='name',
    term_size_limit=None,
):

    import plotnine as p9
    import matplotlib.patheffects as path_effects

    pe = [
        path_effects.Stroke(linewidth=2, foreground='white'),
        path_effects.Normal()
    ]
    if not show_all_labels:
        lbl_df = rev[(rev.eliminated == 0)
                     & (rev.dispensability < dispensability_cutoff)]
        if term_size_limit is not None:
            lbl_df = lbl_df[lbl_df.term_size < term_size_limit]
    else:
        lbl_df = rev

    g = (p9.ggplot(p9.aes(x='plot_X', y='plot_Y'), data=rev) +
         p9.geom_point(p9.aes(fill='neglog10', size='frequency'),
                       color='black',
                       alpha=point_alpha) +
         p9.geom_text(p9.aes(label=text_column),
                      data=lbl_df,
                      size=font_size,
                      adjust_text={
                          'expand_points': expand_points,
                          'arrowprops': {
                              'arrowstyle': '-'
                          },
                          'x': rev.plot_X.values,
                          'y': rev.plot_Y.values
                      },
                      path_effects=pe) + p9.theme_bw() +
         p9.scale_fill_distiller(type='seq', palette=palette, direction=1) +
         p9.labs(x='Semantic similarity space',
                 y='',
                 fill='-log10(adj. p-value)',
                 size='Term frequency') +
         p9.scale_size_continuous(range=(2, 7), trans='log10') +
         p9.theme(figure_size=figure_size,
                  axis_text_x=p9.element_blank(),
                  axis_text_y=p9.element_blank(),
                  axis_ticks=p9.element_blank()))

    return g
示例#16
0
def test_dodge_preserve_single_text():
    df1 = pd.DataFrame({'x': ['a', 'b', 'b', 'b'], 'y': ['a', 'a', 'b', 'b']})

    d = position_dodge(preserve='single', width=0.9)
    p = (ggplot(df1, aes('x', fill='y')) + geom_bar(position=d) +
         geom_text(aes(y=after_stat('count'), label=after_stat('count')),
                   stat='count',
                   position=d,
                   va='bottom'))
    assert p + _theme == 'dodge_preserve_single_text'
示例#17
0
def plotfreq(freqdf):
    '''
    ----------   
    
    Parameters
    ----------
    freqdf  dataframe generated by freq()

    Returns
    -------
    Bar chart with frequencies & percentages in descending order
        
    Example 
    -------
    import exploretransform as et
    df, X, y = et.loadboston()
    et.plotfreq(et.freq(X['town']))

    Warning 
    -------
    This function will likely not plot more than 100 unique levels properly.
    
    ---------- 
    '''

    # input checks
    if isinstance(freqdf, (pd.core.frame.DataFrame)): pass
    else: return print("\nFunction only accetps dataframes\n")

    if len(freqdf.columns) == 4: pass
    else: return print("\nInput must be a dataframe generated by freq()\n")

    if sum(freqdf.columns[1:4] == ['freq', 'perc', 'cump']) == 3: pass
    else: return print("\nInput must be a dataframe generated by freq()\n")

    if len(freqdf) < 101: pass
    else: return print("\nUnable to plot more than 100 items")

    # label for plot
    lbl = freqdf['freq'].astype(str).str.cat(
        '[ ' + freqdf['perc'].astype(str) + '%' + ' ]', sep='   ')
    # create variable to be used in aes
    aesx = 'reorder(' + freqdf.columns[0] + ', freq)'

    # build plot
    plot = (pn.ggplot(freqdf) +
            pn.aes(x=aesx, y='freq', fill='freq', label=lbl) +
            pn.geom_bar(stat='identity') + pn.coord_flip() +
            pn.theme(axis_text_y=pn.element_text(size=6, weight='bold'),
                     legend_position='none') +
            pn.labs(x=freqdf.columns[0], y="Freq") +
            pn.scale_fill_gradient2(mid='bisque', high='blue') +
            pn.geom_text(size=6, nudge_y=.7))

    return plot
示例#18
0
def plot_optimal_model_size(ags):
    from statsmodels.formula import api as smf

    results = {}
    for b, g in ags.groupby('boardsize'):
        ordered = g.sort_values('elo').copy()
        ordered['params'] = g.width**2 * g.depth

        left = np.log10(g.train_flops.min())
        right = np.log10(g.train_flops.max())
        for f in np.linspace(left, right, 11)[1:]:
            subset = ordered[ordered.train_flops <= 10**f]
            results[b, 10**f] = subset.params.iloc[-1]
    df = pd.Series(results).reset_index()
    df.columns = ['boardsize', 'approx_flops', 'params']

    model = smf.ols('np.log10(params) ~ np.log10(approx_flops) + 1', df).fit()

    left, right = np.log10(df.approx_flops.min()), np.log10(
        df.approx_flops.max())
    preds = pd.DataFrame({'approx_flops': 10**np.linspace(left, right, 21)})
    preds['params'] = 10**model.predict(preds)

    labs = df.sort_values('approx_flops').groupby(
        'boardsize').last().reset_index()
    labs['params'] = labs.apply(
        lambda r: df[df.approx_flops <= r.approx_flops].params.max(), axis=1)

    points = df.sort_values('approx_flops').groupby(
        'boardsize').last().reset_index()

    desc = f'log₁₀(params) = {model.params[1]:.2f} · log₁₀(compute) − {-model.params[0]:.1f}'

    return (
        pn.ggplot(df, pn.aes(x='approx_flops', y='params')) +
        pn.geom_line(pn.aes(color='factor(boardsize)', group='boardsize'),
                     show_legend=False) +
        pn.geom_line(data=preds, linetype='dashed', size=.25) +
        pn.geom_point(pn.aes(color='factor(boardsize)', group='boardsize'),
                      data=points,
                      size=.5,
                      show_legend=False) +
        pn.geom_text(pn.aes(
            color='factor(boardsize)', group='boardsize', label='boardsize'),
                     data=labs,
                     nudge_y=+.5,
                     show_legend=False,
                     size=6) +
        pn.annotate(
            'text',
            1e9, 2e7, label=desc, ha='left', size=6, family='serif') +
        pn.scale_x_continuous(trans='log10') +
        pn.scale_y_continuous(trans='log10') + pn.scale_color_hue(l=.4) +
        pn.labs(x='Train-time compute (FLOPS-seconds)',
                y='Optimal model size (params)') + plot.IEEE())
示例#19
0
def plot_test(ags):
    df = ags.query('boardsize == 9').groupby('run').apply(
        lambda df: df[df.idx == df.idx.max()]).copy()
    df['test_flops'] = df.test_nodes * (df.train_flops / df.samples)

    subset = df.query('test_nodes == 64').sort_values('test_flops')
    selection = [
        subset.loc[ELO * subset.elo > e].iloc[0].run
        for e in np.linspace(-2000, -500, 4)
    ]

    df = df[df.run.isin(selection)].copy()

    df['params'] = df.width**2 * df.depth
    df['arch'] = df.apply(lambda r: '{depth}×{width}'.format(**r), axis=1)
    labels = df.sort_values('test_flops').reset_index(
        drop=True).groupby('run').first().reset_index()
    return (pn.ggplot(
        df, pn.aes(x='test_flops', y='ELO*elo', color='params', group='run')) +
            pn.geom_point(size=.25, show_legend=False) +
            pn.geom_line(size=.5, show_legend=False) +
            pn.geom_text(pn.aes(label='test_nodes'),
                         nudge_y=-50,
                         show_legend=False,
                         size=4,
                         va='top') + pn.geom_text(pn.aes(label='test_nodes'),
                                                  nudge_y=-50,
                                                  show_legend=False,
                                                  size=4,
                                                  va='top') +
            pn.geom_text(pn.aes(label='arch'),
                         data=labels,
                         show_legend=False,
                         size=6,
                         nudge_x=-.1,
                         ha='right') + pn.scale_x_continuous(trans='log10') +
            pn.scale_color_cmap('plasma',
                                trans='log10',
                                limits=(df.params.min(), 10 * df.params.max()))
            + pn.coord_cartesian(
                (3.5, None)) + pn.labs(x='Test-time compute (FLOPS-seconds)',
                                       y='Elo v. perfect play') + plot.IEEE())
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="count", label="..count..")) +
      geom_bar(fill="#1e4f79") +
      geom_text(stat="count", va='bottom', size=24) +
      scale_x_discrete(limits=[
          "1", "2", "3", "5", "26", "52", "97", "100", "300", "537"
      ]) + scale_y_continuous(breaks=[0, 5, 10], limits=[0, 10]) +
      ggtitle("Case Study Sizes") + xlab("Number of Projects") +
      ylab("Number of Case Studies") +
      theme_classic(base_size=28, base_family="Helvetica") +
      theme(text=element_text(size=28))).save(file_path, width=14, height=7)
示例#21
0
def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df) - 1, _loc('y')] *= -1
    p = (ggplot(df) +
         geom_col(aes('factor(x)', 'y', fill='factor(y)'), position='stack') +
         geom_text(aes('factor(x)', 'y', label='y'),
                   position=position_stack(vjust=0.5)))

    assert p + _theme == 'stack-negative'
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24, format_string='{:.1%}') +
      scale_x_discrete(limits=self._data["pattern"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Design Pattern Counts") + xlab("Design Pattern") +
      ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=24, height=8)
def plot_ambient_by_difference(adata, plot_name='cellbender_results'):

    # Compute the total amount of expression of each gene
    adata.var['total_gene_counts_raw'] = np.array(
        adata.layers['counts_raw'].sum(axis=0)).squeeze()
    adata.var['total_gene_counts_cellbender'] = np.array(
        adata.layers['counts_cellbender'].sum(axis=0)).squeeze()

    adata.var['difference_total_gene_counts_raw_cellbender'] = adata.var[
        'total_gene_counts_raw'] - adata.var['total_gene_counts_cellbender']

    # Make the plot
    gplt = plt9.ggplot(adata.var)
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_point(plt9.aes(
        x='ambient_expression',
        y='difference_total_gene_counts_raw_cellbender'),
                                  alpha=0.25)
    gplt = gplt + plt9.labs(x='Ambient RNA signature',
                            y='Counts removed by cellbender',
                            title='Ambient RNA signature removal per gene')
    # gplt = gplt + plt9.scale_y_continuous(
    #     trans='log10',
    #     labels=comma_labels,
    #     minor_breaks=0
    # )
    gplt.save(
        '{}-ambient_signature-scatter.png'.format(plot_name),
        #dpi=300,
        width=5,
        height=5)

    # Add gene names to the plot
    gplt = plt9.ggplot(adata.var)
    gplt = gplt + plt9.theme_bw()
    gplt = gplt + plt9.geom_text(plt9.aes(
        x='ambient_expression',
        y='difference_total_gene_counts_raw_cellbender',
        label='gene_symbols'),
                                 alpha=0.25)
    gplt = gplt + plt9.labs(x='Ambient RNA signature',
                            y='Counts removed by cellbender',
                            title='Ambient RNA signature removal per gene')
    # gplt = gplt + plt9.scale_y_continuous(
    #     trans='log10',
    #     labels=comma_labels,
    #     minor_breaks=0
    # )
    gplt.save(
        '{}-ambient_signature-scatter_genenames.png'.format(plot_name),
        #dpi=300,
        width=5,
        height=5)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="category", y="count", label="percent")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24) +
      scale_x_discrete(limits=self._data["category"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Classes per Category") + xlab("Category") +
      ylab("Number of Classes") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=7, height=7)
示例#25
0
def test_stack_negative():
    df = df1.copy()
    _loc = df.columns.get_loc
    df.iloc[0, _loc('y')] *= -1
    df.iloc[len(df)-1, _loc('y')] *= -1
    p = (ggplot(df)
         + geom_col(aes('factor(x)', 'y', fill='factor(y)'),
                    position='stack')
         + geom_text(aes('factor(x)', 'y', label='y'),
                     position=position_stack(vjust=0.5))
         )

    assert p + _theme == 'stack-negative'
示例#26
0
    def getErrorPlot(self, msg="Error Occured"):
        """
        Creates a plotnine plot with error message. To be used to display error essages across dashboards.

        parameters:
        - msg: the message to be displayed when error occurs
        """
        df = DataFrame({"x": [10], "y": [2], "label": [msg]})
        p = ggplot(df , aes(x="x" , y="y" , label="label")) + geom_text(color="Black") \
            + THEME.cat_colors_lines \
              + THEME.mt \
              + theme(figure_size=(20,4) ,axis_text=element_blank(), panel_grid_major=element_blank() , panel_grid_minor=element_blank())
        return p
示例#27
0
def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot
示例#28
0
def setup_heatmap0(df: pd.DataFrame, format_string, axis_text):
    # https://stackoverflow.com/a/62161556/819272
    # Plotnine does not support changing the position of any axis.
    return (p9.ggplot(df, p9.aes(y='row', x='col')) + p9.coord_equal() +
            p9.geom_tile(p9.aes(fill='scale')) + p9.geom_text(
                p9.aes(label='value'), format_string=format_string, size=7) +
            p9.scale_y_discrete(drop=False) + p9.scale_x_discrete(drop=False) +
            p9.scale_fill_gradientn(colors=['#63BE7B', '#FFEB84', '#F8696B'],
                                    na_value='#CCCCCC',
                                    guide=False) +
            p9.theme(axis_text=p9.element_blank()
                     if not axis_text else p9.element_text(face='bold'),
                     axis_ticks=p9.element_blank(),
                     axis_title=p9.element_blank(),
                     panel_grid=p9.element_blank()))
示例#29
0
def plot_company_rank(df):
    assert isinstance(df, pd.DataFrame)
    #assert 'sector' in df.columns
    n_bin = len(df['bin'].unique())
    plot = (p9.ggplot(
        df, p9.aes('date', 'rank', group='asx_code', color='sector')) +
            p9.geom_smooth(span=0.3, se=False) +
            p9.geom_text(p9.aes(label='asx_code', x='x', y='y'),
                         nudge_x=1.2,
                         size=6,
                         show_legend=False) + p9.xlab('') +
            p9.facet_wrap('~bin', nrow=n_bin, ncol=1, scales="free_y") +
            p9.theme(axis_text_x=p9.element_text(angle=30, size=7),
                     figure_size=(8, 20),
                     subplots_adjust={'right': 0.8}))
    return plot_as_inline_html_data(plot)
示例#30
0
def plot_vs_continuous(data_table,
                       continuous_metric_name,
                       breaks,
                       metric_name,
                       segment_name,
                       title,
                       aggregate="mean"):
    result = _aggregate_vs_continuous(data_table, continuous_metric_name, breaks, metric_name, segment_name, aggregate)
    gg_result = plot.ggplot(result) + plot.aes(x="level_0",
                                               y=metric_name,
                                               fill=segment_name,
                                               label=metric_name
                                               ) + \
                plot.geom_bar(stat="identity", position="dodge") + \
                plot.geom_text(position=plot.position_dodge(width=.9), size=8) + \
                plot.labs(x=continuous_metric_name, y=aggregate + "(" + metric_name + ")", title=title)
    return gg_result
示例#31
0
def add_mirna_g(g,df, str_name,str_start,str_end,dis_pos,l_s,l_e,l_score=[]):
    # print(str_name,str_start,str_end,dis_pos,l_s,l_e)
    df[str_start]= pd.Series(l_s)
    df[str_end] = pd.Series(l_e)

    g+= pt.annotate("text", x=0,y=dis_pos,label=str_name)
    g+= pt.geom_errorbarh(df,pt.aes(xmin=str_start,y=(dis_pos),xmax=str_end,color='mi_name'))
    g+= pt.geom_segment(df,pt.aes(x=str_start,y=(dis_pos),yend=0,xend=str_start,color='mi_name'))
    if(l_score):
        # print(l_score)
        # pd.options.display.float_format = '{:.1f}'.format
        score_column_name = 'score'+str_name
        # print(l_score,score_column_name,str_start,dis_pos)
        df[score_column_name] = pd.Series(l_score,dtype=np.float).map('{:.0f}'.format)
        
        g+= pt.geom_text(df, pt.aes(x=str_start,y=dis_pos,label=score_column_name,color='mi_name'),
                          nudge_x=0.1, nudge_y=0.1)#,adjust_text=adjust_text_dict)
def plot_distributions_bar_plot_grid(dataframe, figure_size=(14, 4)):
    """
    We create a function to plot the bar plot.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='threshold', fill='value'))
        # Add the bars.
        + p9.geom_bar(position='dodge') +
        p9.geom_text(p9.aes(label='stat(count)'),
                     stat='count',
                     position=p9.position_dodge(0.9),
                     size=7,
                     va='bottom')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Threshold')
        # Rename the y axis, give some space on top and bottom (mul_bottom, add_bottom, mul_top, add_top).
        + p9.scale_y_continuous(name='Count', expand=(0, 0, 0, 500))
        # Replace the names in the legend and set the colors of the bars.
        + p9.scale_fill_manual(values={
            0: '#009e73',
            1: '#d55e00'
        },
                               labels=lambda l: [{
                                   0: 'Stable',
                                   1: 'Unstable'
                               }[x] for x in l])
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid('. ~ iterations',
                        labeller=p9.labeller(cols=lambda x: f'iters = {x}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
示例#33
0
def create_length_plot(len_df, legend_position='right', legend_box='vertical'):
    mean_len_df = len_df.groupby(['Task', 'Method']).mean().reset_index()
    mean_len_df[' '] = 'Mean Length'

    plt = (
        ggplot(len_df)
        + aes(x='x', fill='Method', y='..density..')
        + geom_histogram(binwidth=2, position='identity', alpha=.6)
        + geom_text(
            aes(x='x', y=.22, label='x', color='Method'),
            mean_len_df,
            inherit_aes=False,
            format_string='{:.1f}',
            show_legend=False
        )
        + geom_segment(
            aes(x='x', xend='x', y=0, yend=.205, linetype=' '),
            mean_len_df,
            inherit_aes=False, color='black'
        )
        + scale_linetype_manual(['dashed'])
        + facet_wrap('Task')
        + xlim(0, 20) + ylim(0, .23)
        + xlab('Example Length') + ylab('Frequency')
        + scale_color_manual(values=COLORS)
        + scale_fill_manual(values=COLORS)
        + theme_fs()
        + theme(
            aspect_ratio=1,
            legend_title=element_blank(),
            legend_position=legend_position,
            legend_box=legend_box,
        )
    )

    return plt
示例#34
0
def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')
示例#35
0
from __future__ import absolute_import, division, print_function
import os

import matplotlib.pyplot as plt
import pytest
import six

from plotnine import ggplot, aes, geom_text, ggsave
from plotnine.data import mtcars
from plotnine.exceptions import PlotnineError

p = (ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars)
     + geom_text())


def sequential_filenames():
    """
    Generate filenames for the tests
    """
    for i in range(100):
        yield 'filename-{}.png'.format(i)


filename_gen = sequential_filenames()


def assert_file_exist(filename, msg=None):
    if not msg:
        msg = "File {} does not exist".format(filename)
    assert os.path.exists(filename), msg
示例#36
0
plt.ion()


import RestrictedData
xnorms = RestrictedData.xnorms
annots = RestrictedData.annots


tsne = TSNE(n_components=2, verbose=1,
            perplexity=10, method='barnes_hut', angle=0.5,
            init='pca', early_exaggeration=12, learning_rate=200,
            n_iter=1000, random_state=123)
tsneResults = tsne.fit_transform(xnorms['shen'].values)


ggd = pd.DataFrame({'sample' : xnorms['shen'].index,
                    'system' : annots['shen'].reindex(xnorms['shen'].index)['System'],
                    'coord1' : tsneResults[:, 0],
                    'coord2' : tsneResults[:, 1]})
plt.close()
ggo = gg.ggplot(ggd, gg.aes(x='coord1', y='coord2', color='system', label='sample'))
ggo += gg.geom_point()
ggo += gg.geom_text(nudge_y=9, show_legend=False)
ggo += gg.scale_color_manual(values=['firebrick', 'goldenrod', 'lightseagreen',
                                     'darkorchid', 'darkslategray', 'dodgerblue'])
ggo += gg.theme_bw()
ggo += gg.xlab('tSNE coordinate 1')
ggo += gg.ylab('tSNE coordinate 2')
print(ggo)