示例#1
0
文件: figures.py 项目: jetstreamin/qb
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (ggplot(self.acc_df) + facet_wrap('position') +
                 aes(x='guesser', y='accuracy', fill='Dataset') +
                 geom_bar(stat='identity', position='dodge') +
                 xlab('Guessing Model') + ylab('Accuracy'))
     else:
         return (ggplot(self.acc_df) + facet_wrap('position') +
                 aes(x='guesser', y='accuracy') + geom_bar(stat='identity'))
示例#2
0
文件: core.py 项目: ozacas/asxtrade
def plot_cumulative_returns(wanted_stocks: Iterable[str],
                            ld: LazyDictionary) -> p9.ggplot:
    df = ld["cip_df"]
    df = df.filter(wanted_stocks, axis=0).filter(regex="^\d", axis=1)
    dates = set(df.columns)
    movers = df
    movers["asx_code"] = movers.index
    movers = movers.melt(id_vars="asx_code", value_vars=dates)
    movers = movers[(movers["value"] < -5.0) |
                    (movers["value"] > 5.0)]  # ignore small movers
    # print(movers)
    movers["fetch_date"] = pd.to_datetime(movers["fetch_date"],
                                          format="%Y-%m-%d")

    # need to have separate dataframe's for positive and negative stocks - otherwise plotnine plot will be wrong
    #print(df)
    pos_df = df.agg([positive_sum])
    neg_df = df.agg([negative_sum])
    pos_df = pos_df.melt(value_vars=dates)
    neg_df = neg_df.melt(value_vars=dates)
    pos_df["fetch_date"] = pd.to_datetime(pos_df["fetch_date"],
                                          format="%Y-%m-%d")
    neg_df["fetch_date"] = pd.to_datetime(neg_df["fetch_date"],
                                          format="%Y-%m-%d")

    plot = (p9.ggplot() + p9.geom_bar(
        p9.aes(x="fetch_date", y="value"),
        data=pos_df,
        stat="identity",
        fill="green",
    ) + p9.geom_bar(
        p9.aes(x="fetch_date", y="value"),
        data=neg_df,
        stat="identity",
        fill="red",
    ) + p9.geom_point(
        p9.aes(
            x="fetch_date",
            y="value",
            fill="asx_code",
        ),
        data=movers,
        size=3,
        position=p9.position_dodge(width=0.4),
        colour="black",
    ))
    return user_theme(
        plot,
        y_axis_label="Cumulative Return (%)",
        legend_position="right",
        asxtrade_want_cmap_d=False,
        asxtrade_want_fill_d=
        True,  # points (stocks) are filled with the user-chosen theme, but everything else is fixed
    )
示例#3
0
def test_after_scale_mapping():
    df = pd.DataFrame({'x': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]})
    df2 = pd.DataFrame({
        # Same as above, but add 2 of each unique element
        'x': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4] + [1, 2, 3, 4] * 2
    })

    p = ggplot(df) + geom_bar(aes(x='x', ymax=after_scale('ymax + 2')))
    p2 = ggplot(df2) + geom_bar(aes(x='x'))

    assert p + lims(y=(0, 7)) == 'after_scale_mapping'
    assert p2 + lims(y=(0, 7)) == 'after_scale_mapping'
示例#4
0
文件: figures.py 项目: Pinafore/qb
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy', fill='Dataset')
             + geom_bar(stat='identity', position='dodge')
             + xlab('Guessing Model')
             + ylab('Accuracy')
         )
     else:
         return (
             ggplot(self.acc_df) + facet_wrap('position')
             + aes(x='guesser', y='accuracy')
             + geom_bar(stat='identity')
         )
示例#5
0
def test_calculated_aes():
    # after_stat('ae')
    mapping1 = aes('x', y=after_stat('density'))
    mapping2 = aes('x', y=after_stat('density*2'))
    mapping3 = aes('x', y=after_stat('density + count'))
    mapping4 = aes('x', y=after_stat('func(density)'))

    def _test():
        assert list(mapping1._calculated.keys()) == ['y']
        assert list(mapping2._calculated.keys()) == ['y']
        assert list(mapping3._calculated.keys()) == ['y']
        assert list(mapping4._calculated.keys()) == ['y']

        assert mapping1['y'].after_stat == 'density'
        assert mapping2['y'].after_stat == 'density*2'
        assert mapping3['y'].after_stat == 'density + count'
        assert mapping4['y'].after_stat == 'func(density)'

        assert mapping1._calculated['y'] == 'density'
        assert mapping2._calculated['y'] == 'density*2'
        assert mapping3._calculated['y'] == 'density + count'
        assert mapping4._calculated['y'] == 'func(density)'

    _test()

    # 'stat(ae)', DEPRECATED but still works
    mapping1 = aes('x', y='stat(density)')
    mapping2 = aes('x', y='stat(density*2)')
    mapping3 = aes('x', y='stat(density + count)')
    mapping4 = aes('x', y='stat(func(density))')
    _test()

    # '..ae..', DEPRECATED but still works
    mapping1 = aes('x', y='..density..')
    mapping2 = aes('x', y='..density..*2')
    mapping3 = aes('x', y='..density.. + ..count..')
    mapping4 = aes('x', y='func(..density..)')
    _test()

    df = pd.DataFrame({'x': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]})
    p = ggplot(df) + geom_bar(aes(x='x', fill=after_stat('count + 2')))
    p.draw_test()

    p = ggplot(df) + geom_bar(aes(x='x', fill='stat(count + 2)'))
    p.draw_test()

    p = ggplot(df) + geom_bar(aes(x='x', fill='..count.. + 2'))
    p.draw_test()
示例#6
0
def bsuite_bar_plot(df_in: pd.DataFrame,
                    sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Output bar plot of bsuite data."""
  df = _clean_bar_plot_data(df_in, sweep_vars)

  p = (gg.ggplot(df)
       + gg.aes(x='env', y='score', colour='type', fill='type')
       + gg.geom_bar(position='dodge', stat='identity')
       + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5)
       + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS)
       + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)
       + gg.xlab('experiment')
       + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1))
      )
  if not all(df.finished):  # add a layer of alpha for unfinished jobs
    p += gg.aes(alpha='finished')
    p += gg.scale_alpha_discrete(range=[0.3, 1.0])

  # Compute the necessary size of the plot
  if sweep_vars:
    p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1)
    n_hypers = df[sweep_vars].drop_duplicates().shape[0]
  else:
    n_hypers = 1
  return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
示例#7
0
def plot_mem(df):
    x = df.copy()
    # initialise some extra columns useful for plotting
    x['new_cols'] = [str(i) for i in x['col_name']]
    x['new_cols'] = pd.Categorical(x['new_cols'],
                                   categories=x['new_cols'],
                                   ordered=True)
    x['cnt_print_loc_pos'] = (x.pcnt.values) + (np.max(x.pcnt.values)) / 70
    x['cnt_print_loc_neg'] = (x.pcnt.values) - (np.max(x.pcnt.values)) / 70
    # build basic plot
    ggplt  = p9.ggplot(x, p9.aes(x = 'new_cols', y = 'pcnt', fill = 'new_cols')) \
      + p9.geom_bar(stat = 'identity') \
      + p9.guides(fill = False) \
      + p9.ylab('% of total size') \
      + p9.xlab('') \
      + p9.theme(axis_text_x=p9.element_text(rotation = 45, hjust=1))

    # add text labels to the highest bars
    y1 = x.copy()[x.pcnt > 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_neg', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y1, color = 'white', \
        angle = 90, vjust = 'top')
    # add text labels to the lower bars
    y2 = x.copy()[x.pcnt <= 0.3 * np.max(x.pcnt)]
    ggplt = ggplt + \
      p9.geom_text(p9.aes(x = 'new_cols', y = 'cnt_print_loc_pos', label = 'size', \
        fill = 'col_name'), inherit_aes = False, data = y2, color = 'gray', \
        angle = 90, vjust = 'bottom')
    return ggplt
    def plot_data_point(self, data_point_ix, use_base=True,
                        figure_size=(8, 6)):
        """ Plot Shapley values for an individual data point

        Parameters
        ----------
        data_point_ix :  int
        use_base : boolean, optional default=True

        Returns
        -------
        g : ggplot object
        """
        # Check Shapley values exist
        if self._shapley_values is None:
            raise Exception("No Shapley values are available")

        d = self.get_shapley_values().loc[[data_point_ix]]

        if not use_base:
            d = d.drop("BASE", axis=1)

        g = (ggplot(d.reset_index(drop=False).melt(
            id_vars="index"),
            aes(x="variable", y="value", fill="variable")) +
            geom_bar(stat="identity") +
            labs(title="Shapley values (Index: " + str(data_point_ix) + ")",
                 x="Feature",
                 y="Shapley value",
                 fill="Feature") +
            coord_flip())
        g += theme(figure_size=figure_size)

        return g
示例#9
0
def cell_cycle_phase_barplot(adata, palette='Set2'):
    """Plots the proportion of cells in each phase of the cell cycle

    See also: cell_cycle_phase_pieplot for the matplotlib pie chart


    Parameters
    -----------
    adata: AnnData
        The AnnData object being used for the analysis. Must be previously
        evaluated by `tl.annotate_cell_cycle`.

    Returns
    -----------
    A plotnine barplot with the total counts of cell in each phase of the
    cell cycle.

    """
    plt_data = adata.obs.copy()
    plt_data['cell_cycle_phase'] = pd.Categorical(
        plt_data['cell_cycle_phase'],
        categories=['G1 post-mitotic', 'G1 pre-replication', 'S/G2/M'])

    cycle_plot = (
        ggplot(plt_data, aes('cell_cycle_phase', fill='cell_cycle_phase')) +
        geom_bar() + coord_flip() + guides(fill=False) +
        labs(y='', x='Cell cycle phase') + theme_light() +
        theme(panel_grid_major_y=element_blank(),
              panel_grid_minor_y=element_blank(),
              panel_grid_major_x=element_line(size=1.5),
              panel_grid_minor_x=element_line(size=1.5)) +
        scale_fill_brewer(type='qual', palette=palette))

    return cycle_plot
示例#10
0
文件: plots.py 项目: mappin/asxtrade
def plot_breakdown(cip_df: pd.DataFrame):
    """Stacked bar plot of increasing and decreasing stocks per sector in the specified df"""
    cols_to_drop = [colname for colname in cip_df.columns if colname.startswith('bin_')]
    df = cip_df.drop(columns=cols_to_drop)
    df = pd.DataFrame(df.sum(axis='columns'), columns=['sum'])
    df = df.merge(stocks_by_sector(), left_index=True, right_on='asx_code')

    if len(df) == 0: # no stock in cip_df have a sector? ie. ETF?
        return None

    assert set(df.columns) == set(['sum', 'asx_code', 'sector_name'])
    df['increasing'] = df.apply(lambda row: 'up' if row['sum'] >= 0.0 else 'down', axis=1)
    sector_names = df['sector_name'].value_counts().index.tolist() # sort bars by value count (ascending)
    sector_names_cat = pd.Categorical(df['sector_name'], categories=sector_names)
    df = df.assign(sector_name_cat=sector_names_cat)

    #print(df)
    plot = (
        p9.ggplot(df, p9.aes(x='factor(sector_name_cat)', fill='factor(increasing)'))
        + p9.geom_bar()
        + p9.labs(x="Sector", y="Number of stocks")
        + p9.theme(axis_text_y=p9.element_text(size=7), 
                   subplots_adjust={"left": 0.2, 'right': 0.85},
                   legend_title=p9.element_blank()
                  )
        + p9.coord_flip()
    )
    return plot_as_inline_html_data(plot)
def test_stat_count_float():
    df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1.5, 2.5]})

    p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() +
         geom_text(aes(label=after_stat('count')), stat='count'))

    assert p + _theme == 'stat-count-float'
示例#12
0
def plot_vs_discrete(data_table,
                     discrete_metric_name,
                     metric_name,
                     segment_name,
                     title,
                     ylim=None,
                     aggregate="mean"
                     ):
    data_filtered = \
        data_table.loc[((pd.notnull(data_table[metric_name])) & (pd.notnull(data_table[discrete_metric_name])))][
            [discrete_metric_name, metric_name, segment_name]]

    data_filtered[[metric_name]] = data_filtered[[metric_name]].astype(float)
    result = data_filtered.groupby([discrete_metric_name, segment_name]).agg({metric_name: aggregate}).reset_index()
    result[metric_name] = round(result[metric_name], 3)

    gg_result = plot.ggplot(result) + plot.aes(x=discrete_metric_name,
                                               y=metric_name,
                                               fill=segment_name,
                                               label=metric_name
                                               ) + \
                plot.geom_bar(stat="identity", position="dodge") + \
                plot.geom_text(position=plot.position_dodge(width=.9), size=8) + \
                plot.labs(x=discrete_metric_name, y=aggregate + "(" + metric_name + ")", title=title)

    if pd.notnull(ylim):
        gg_result = gg_result + plot.ylim(ylim)

    return gg_result
示例#13
0
def test_stat_count_int():
    df = pd.DataFrame({'x': ['a', 'b'], 'weight': [1, 2]})

    p = (ggplot(df) + aes(x='x', weight='weight', fill='x') + geom_bar() +
         geom_text(aes(label='stat(count)'), stat='count'))

    assert p + _theme == 'stat-count-int'
示例#14
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
示例#15
0
def plot_trend(sample_period="M", ld: LazyDictionary = None) -> str:
    """
    Given a dataframe of a single stock from company_prices() this plots the highest price
    in each month over the time period of the dataframe.
    """
    assert "stock_df" in ld

    def inner_date_fmt(dates_to_format):
        results = []
        for d in dates_to_format:
            d -= timedelta(
                weeks=4
            )  # breaks are set to the end of the month rather than the start... so
            results.append(d.strftime("%Y-%m"))
        return results

    stock_df = ld["stock_df"]
    # print(stock_df)
    dataframe = stock_df.filter(items=["last_price"])
    dataframe.index = pd.to_datetime(dataframe.index, format="%Y-%m-%d")
    dataframe = dataframe.resample(sample_period).max()
    # print(dataframe)
    plot = (
        p9.ggplot(
            dataframe,
            p9.aes(x="dataframe.index",
                   y=dataframe.columns[0],
                   fill=dataframe.columns[0]),
        ) + p9.geom_bar(stat="identity", alpha=0.7) + p9.scale_x_datetime(
            labels=inner_date_fmt
        )  # dont print day (always 1st day of month due to resampling)
    )
    return user_theme(plot,
                      y_axis_label="$ AUD",
                      asxtrade_want_fill_continuous=True)
示例#16
0
def test_coord_trans_reverse():
    # coord trans can reverse continous and discrete data
    p = (ggplot(df, aes('factor(x)'))
         + geom_bar(aes(fill='factor(z)'), show_legend=False)
         + coord_trans(x='reverse', y='reverse')
         )
    assert p == 'coord_trans_reverse'
示例#17
0
def summary(tags, opts=None):
    print(tags)
    tags_summary = (
        tags.groupby(["tag", "background"])
        .agg({"tag": "count"})
        .rename(columns={"tag": "n_tags"})
        .reset_index()
        .astype({"background": "category", "tag": "category"})
    )
    print(tags_summary)
    # tags_summary = tags_df.groupby(["species"]).agg(
    #     {"tag_duration": "sum", "species": "count"}
    # )

    # tags_summary.rename(columns={"species": "count"}, inplace=True)

    # tags_summary["tag_duration"] = tags_summary.tag_duration.astype(int)
    # tags_summary["duration"] = tags_summary.tag_duration.astype(str) + "s"
    # tags_summary = tags_summary.reindex(list(SPECIES_LABELS.keys()))
    # # tags_summary["species"] = tags_summary.index
    # tags_summary.reset_index(inplace=True)
    # tags_summary
    # (
    #     ggplot(
    #         data=tags_summary,
    #         mapping=aes(
    #             x="factor(species, ordered=False)",
    #             y="tag_duration",
    #             fill="factor(species, ordered=False)",
    #         ),
    #     )
    #     + geom_bar(stat="identity", show_legend=False)
    #     + xlab("Species")
    #     + ylab("Duration of annotations (s)")
    #     + geom_text(mapping=aes(label="count"), nudge_y=15)
    #     + theme_classic()
    #     + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    # ).save("species_repartition_duration_mini.png", width=10, height=8)

    plt = (
        ggplot(
            data=tags_summary,
            mapping=aes(
                x="tag",  # "factor(species, ordered=False)",
                y="n_tags",
                fill="background",  # "factor(species, ordered=False)",
            ),
        )
        + geom_bar(stat="identity", show_legend=True, position=position_dodge())
        + xlab("Species")
        + ylab("Number of annotations")
        + geom_text(mapping=aes(label="n_tags"), nudge_y=15)
        + theme_classic()
        + theme(axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}))
        # + scale_x_discrete(limits=SPECIES_LIST, labels=xlabels)
    ).save("tag_species_bg.png", width=10, height=8)
    # print(tags_summary)

    print(plt)
示例#18
0
文件: plots.py 项目: mappin/asxtrade
def plot_point_scores(stock: str, sector_companies, all_stocks_cip: pd.DataFrame, rules):
    """
    Visualise the stock in terms of point scores as described on the stock view page. Rules to apply
    can be specified by rules (default rules are provided by rule_*())

    Points are lost for equivalent downturns and the result plotted. All rows in all_stocks_cip will be
    used to calculate the market average on a given trading day, whilst only sector_companies will
    be used to calculate the sector average. A utf-8 base64 encoded plot image is returned
    """
    assert len(stock) >= 3
    assert all_stocks_cip is not None
    assert rules is not None and len(rules) > 0

    rows = []
    points = 0
    day_low_high_df = day_low_high(stock, all_dates=all_stocks_cip.columns)
    state = {
        "day_low_high_df": day_low_high_df,  # never changes each day, so we init it here
        "all_stocks_change_in_percent_df": all_stocks_cip,
        "stock": stock,
        "daily_range_threshold": 0.20,  # 20% at either end of the daily range gets a point
    }
    net_points_by_rule = defaultdict(int)
    for date in all_stocks_cip.columns:
        market_avg = all_stocks_cip[date].mean()
        sector_avg = all_stocks_cip[date].filter(items=sector_companies).mean()
        stock_move = all_stocks_cip.at[stock, date]
        state.update(
            {
                "market_avg": market_avg,
                "sector_avg": sector_avg,
                "stock_move": stock_move,
                "date": date,
            }
        )
        points += sum(map(lambda r: r(state), rules))
        for r in rules:
            k = r.__name__
            if k.startswith("rule_"):
                k = k[5:]
            net_points_by_rule[k] += r(state)
        rows.append({"points": points, "stock": stock, "date": date})

    df = pd.DataFrame.from_records(rows)
    df["date"] = pd.to_datetime(df["date"])
    point_score_plot = plot_series(df, x="date", y="points")

    rows = []
    for k, v in net_points_by_rule.items():
        rows.append({"rule": str(k), "net_points": v})
    df = pd.DataFrame.from_records(rows)
    net_rule_contributors_plot = (
        p9.ggplot(df, p9.aes(x="rule", y="net_points"))
        + p9.labs(x="Rule", y="Contribution to points by rule")
        + p9.geom_bar(stat="identity")
        + p9.theme(axis_text_y=p9.element_text(size=7), subplots_adjust={"left": 0.2})
        + p9.coord_flip()
    )
    return point_score_plot, plot_as_inline_html_data(net_rule_contributors_plot)
示例#19
0
 def plot_compare_accuracy(self, expo=False):
     if expo:
         return (
             ggplot(self.acc_df)
             + facet_wrap("position")
             + aes(x="guesser", y="accuracy", fill="Dataset")
             + geom_bar(stat="identity", position="dodge")
             + xlab("Guessing Model")
             + ylab("Accuracy")
         )
     else:
         return (
             ggplot(self.acc_df)
             + facet_wrap("position")
             + aes(x="guesser", y="accuracy")
             + geom_bar(stat="identity")
         )
示例#20
0
def duration_graph(Data, Data_m):
    print('======= Creating duration_graph =======')
    #Filter current year and month, and correct Duration
    
    #Graph2_ALL.Duration = Graph2_ALL.Duration/60
    #Graph2_ALL.Duration = Graph2_ALL.Duration.astype(str)    
    x = Data.Duration[pd.isna(Data.Duration) == True]
    if (len(x) == len(Data)):
        logging.warning('=================================Graph_2 aborted =============================')
        return
    else:
        Graph2 = Data_m[(Data_m.Duration < 180)]
        Graph2_ALL = Data[(Data.Duration < 180)]
        if (len(Graph2_ALL) > 0):
                    plot= (p9.ggplot(data=Graph2_ALL,
                                     mapping=p9.aes(x='Duration'))
                                + p9.geom_bar(fill = 'red', stat = 'count', size = 100)
                                + p9.theme_classic()
                                + p9.theme(axis_text = p9.element_text(size=40),
                                           axis_title = p9.element_text(size = 40,face = 'bold'))
                                + p9.labs(title = '', x='',y='No. of attacks')
                                )
                    plot.save(filename = 'Graph_ALL_2.jpeg',plot = plot,
                          path = "pdf/iteration/",
                          width = 25, height = 5,
                          dpi = 320)
        else: 
                print('Plot not created; no data found.')
        if (len(Graph2) > 0):
                    plot_month= (p9.ggplot(data=Graph2,
                                           mapping=p9.aes(x='Duration'))
                                + p9.geom_bar(fill = 'red', stat = 'count', size = 100)
                                + p9.theme_classic()
                                + p9.theme(axis_text = p9.element_text(size=40),
                                           axis_title = p9.element_text(size = 40,face = 'bold'))
                                + p9.labs(title = '', x='',y='No. of attacks')
                                )
                    plot_month.save(filename = 'Graph_2.jpeg',
                                plot = plot_month,
                                path = "pdf/iteration/",
                                width = 25, height = 5,
                                dpi = 320)
        else: 
                print('Plot not created; no data found.')
    return(print('=================================duration_graph DONE ============================='))
示例#21
0
def test_discrete_xy_scale_drop_limits():
    df = pd.DataFrame({
        'x': list('aaaabbbbccccddd'),
        'c': list('112312231233123')
    })

    p = (ggplot(df) + geom_bar(aes(x='x', fill='c')) +
         scale_x_discrete(limits=list('abc')))
    assert p == 'discrete_xy_scale_drop_limits'
示例#22
0
def make_bar_plot(data,x,y):
    """
        Make a bar plot between two variables data[x] and data[y]
    """
    (p9.ggplot(data=data,
           mapping=p9.aes(x=x, y=y))
    + p9.geom_bar(stat='identity'))
    + p9.theme(axis_text_x=p9.element_text(angle=90))
    + p9.labs(title='{} By {}'.format(x,y)));
示例#23
0
def make_single_bar_chart_multi_year(survey_data, column, facet, proportionally=False):
    """Make a barchart showing the number of respondents responding to a single column.
        Bars are colored by which year of the survey they correspond to. If facet
        is not empty, the resulting plot will be faceted into subplots by the variables
        given. 

    Args:
        survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey   
        column (str): Column to plot responses to
        facet (list,optional): List of columns use for grouping
        proportionally (bool, optiona ): Defaults to False. If True,
            the bars heights are determined proportionally to the 
            total number of responses in that facet. 

    Returns:
        (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file

    """
    cols = [column, facet]
    show_legend = False
    topic_data = survey_data[cols + ["year"]]

    topic_data_long = make_long(topic_data, facet, multi_year=True)

    if proportionally:
        proportions = (
            topic_data_long[topic_data_long.rating == 1].groupby(facet + ["year"]).sum()
            / topic_data_long.groupby(facet + ["year"]).sum()
        ).reset_index()
    else:
        proportions = (
            topic_data_long[topic_data_long.rating == 1]
            .groupby(facet + ["year"])
            .count()
            .reset_index()
        )

    x = topic_data_long.columns.tolist()
    x.remove("level_1")

    ## Uncomment to return dataframe instead of plot
    # return proportions

    return (
        p9.ggplot(proportions, p9.aes(x=facet, fill="year", y="level_1"))
        + p9.geom_bar(show_legend=show_legend, stat="identity")
        + p9.theme(
            axis_text_x=p9.element_text(angle=45, ha="right"),
            strip_text_y=p9.element_text(angle=0, ha="left"),
        )
        + p9.scale_x_discrete(
            limits=topic_data_long[facet].unique().tolist(),
            labels=[
                x.replace("_", " ") for x in topic_data_long[facet].unique().tolist()
            ],
        )
    )
示例#24
0
def plot_grades(dat):
    import plotnine as p9

    p = {
        p9.ggplot(dat, p9.aes('grade')) +
        p9.geom_bar() +
        p9.facet_wrap('keywords')
    }

    return p
示例#25
0
def test_dodge_preserve_single_text():
    df1 = pd.DataFrame({'x': ['a', 'b', 'b', 'b'], 'y': ['a', 'a', 'b', 'b']})

    d = position_dodge(preserve='single', width=0.9)
    p = (ggplot(df1, aes('x', fill='y')) + geom_bar(position=d) +
         geom_text(aes(y=after_stat('count'), label=after_stat('count')),
                   stat='count',
                   position=d,
                   va='bottom'))
    assert p + _theme == 'dodge_preserve_single_text'
示例#26
0
def plotfreq(freqdf):
    '''
    ----------   
    
    Parameters
    ----------
    freqdf  dataframe generated by freq()

    Returns
    -------
    Bar chart with frequencies & percentages in descending order
        
    Example 
    -------
    import exploretransform as et
    df, X, y = et.loadboston()
    et.plotfreq(et.freq(X['town']))

    Warning 
    -------
    This function will likely not plot more than 100 unique levels properly.
    
    ---------- 
    '''

    # input checks
    if isinstance(freqdf, (pd.core.frame.DataFrame)): pass
    else: return print("\nFunction only accetps dataframes\n")

    if len(freqdf.columns) == 4: pass
    else: return print("\nInput must be a dataframe generated by freq()\n")

    if sum(freqdf.columns[1:4] == ['freq', 'perc', 'cump']) == 3: pass
    else: return print("\nInput must be a dataframe generated by freq()\n")

    if len(freqdf) < 101: pass
    else: return print("\nUnable to plot more than 100 items")

    # label for plot
    lbl = freqdf['freq'].astype(str).str.cat(
        '[ ' + freqdf['perc'].astype(str) + '%' + ' ]', sep='   ')
    # create variable to be used in aes
    aesx = 'reorder(' + freqdf.columns[0] + ', freq)'

    # build plot
    plot = (pn.ggplot(freqdf) +
            pn.aes(x=aesx, y='freq', fill='freq', label=lbl) +
            pn.geom_bar(stat='identity') + pn.coord_flip() +
            pn.theme(axis_text_y=pn.element_text(size=6, weight='bold'),
                     legend_position='none') +
            pn.labs(x=freqdf.columns[0], y="Freq") +
            pn.scale_fill_gradient2(mid='bisque', high='blue') +
            pn.geom_text(size=6, nudge_y=.7))

    return plot
def plot_rank_full(df, plot_fn):
    f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="ratio", fill="factor(rank)"))
         + p9.geom_bar(stat="identity") + p9.facet_wrap("cluster_labels_6") +
         p9.labs(x="Model", y="Proportion (%)", fill="Rank") + p9.theme_538() +
         p9.theme(legend_position="top",
                  legend_direction="horizontal",
                  figure_size=(10, 5)) +
         p9.theme(plot_background=p9.element_rect(
             fill=BG_COLOR, color=BG_COLOR, size=1),
                  axis_text_x=p9.element_text(rotation=45, hjust=1)))
    f.save(plot_fn)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24, format_string='{:.1%}') +
      scale_x_discrete(limits=self._data["pattern"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Design Pattern Counts") + xlab("Design Pattern") +
      ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=24, height=8)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="count", label="..count..")) +
      geom_bar(fill="#1e4f79") +
      geom_text(stat="count", va='bottom', size=24) +
      scale_x_discrete(limits=[
          "1", "2", "3", "5", "26", "52", "97", "100", "300", "537"
      ]) + scale_y_continuous(breaks=[0, 5, 10], limits=[0, 10]) +
      ggtitle("Case Study Sizes") + xlab("Number of Projects") +
      ylab("Number of Case Studies") +
      theme_classic(base_size=28, base_family="Helvetica") +
      theme(text=element_text(size=28))).save(file_path, width=14, height=7)
示例#30
0
def test_calculated_aes():
    _strip = strip_calculated_markers

    # stat(ae)
    mapping1 = aes('x', y='stat(density)')
    mapping2 = aes('x', y='stat(density*2)')
    mapping3 = aes('x', y='stat(density + count)')
    mapping4 = aes('x', y='func(stat(density))')

    assert get_calculated_aes(mapping1) == ['y']
    assert get_calculated_aes(mapping2) == ['y']
    assert get_calculated_aes(mapping3) == ['y']
    assert get_calculated_aes(mapping4) == ['y']

    assert _strip(mapping1['y']) == 'density'
    assert _strip(mapping2['y']) == 'density*2'
    assert _strip(mapping3['y']) == 'density + count'
    assert _strip(mapping4['y']) == 'func(density)'

    # ..ae..
    mapping1 = aes('x', y='..density..')
    mapping2 = aes('x', y='..density..*2')
    mapping3 = aes('x', y='..density.. + ..count..')
    mapping4 = aes('x', y='func(..density..)')

    assert get_calculated_aes(mapping1) == ['y']
    assert get_calculated_aes(mapping2) == ['y']
    assert get_calculated_aes(mapping3) == ['y']
    assert get_calculated_aes(mapping4) == ['y']

    assert _strip(mapping1['y']) == 'density'
    assert _strip(mapping2['y']) == 'density*2'
    assert _strip(mapping3['y']) == 'density + count'
    assert _strip(mapping4['y']) == 'func(density)'

    df = pd.DataFrame({'x': [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]})
    p = ggplot(df) + geom_bar(aes(x='x', fill='stat(count + 2)'))
    p.draw_test()

    p = ggplot(df) + geom_bar(aes(x='x', fill='..count.. + 2'))
    p.draw_test()
示例#31
0
def plot_outcome_counts(read_file_1, read_file_2, save_file, plot_dir):
    temp_sub = pd.read_csv(os.path.join(dir_output, read_file_1))
    temp_agg = pd.read_csv(os.path.join(dir_output, read_file_2))
    temp_sub = recode_outcome(temp_sub)
    temp_agg = recode_outcome(temp_agg)
    plot_output = os.path.join(dir_figures, plot_dir)
    dat = pd.concat([temp_agg, temp_sub], axis=0).reset_index(drop=True)
    dat = dat.groupby(['outcome', 'model']).size().reset_index(name='counts')
    img = (ggplot(dat, aes(x='outcome', y='counts', fill='model')) +
           geom_bar(stat='identity', position='dodge')) + labs(
               x='Outcome', y='Counts') + theme_bw()
    img.save(os.path.join(plot_output, save_file))
示例#32
0
def test_removes_infinite_values():
    df = mtcars.copy()
    df.loc[[0, 5], 'wt'] = [np.inf, -np.inf]
    p = ggplot(df, aes(x='wt')) + geom_bar()

    with pytest.warns(UserWarning) as record:
        p._build()

    def removed_2_row_with_infinites(record):
        for item in record:
            msg = str(item.message).lower()
            if '2 rows' in msg and 'non-finite' in msg:
                return True
        return False

    assert removed_2_row_with_infinites(record)
示例#33
0
def test_dodge():
    p = (ggplot(df2, aes('factor(z)')) +
         geom_bar(aes(fill='factor(x)'), position='dodge'))
    assert p + _theme == 'dodge'
示例#34
0
文件: logo.py 项目: jwhendy/plotnine
gradient = (
    (0.99, 0.88, 0.87),
    (0.98, 0.62, 0.71),
    (0.86, 0.20, 0.59),
    bcolor, bcolor,
    bcolor_darker, bcolor_darker)

df1 = df[:n//3:9]
df2 = df[n//3:2*n//3]
df3 = df[2*n//3::12]

p = (ggplot(aes('x', 'y', color='y', fill='y'))
     + annotate(geom='label', x=0.295, y=0.495, label='pl  tnine',
                label_size=1.5, label_padding=.1, size=24,
                fill=bcolor_lighter, color=bcolor)
     + geom_point(df1, size=8, stroke=0, show_legend=False)
     + geom_line(df2, size=2, color=bcolor_darker, show_legend=False)
     + geom_bar(df3, aes('x+.06'), stat='identity', size=0, show_legend=False)

     + scale_color_gradientn(colors=gradient)
     + scale_fill_gradientn(colors=gradient)
     + theme_void()
     + theme(figure_size=(3.6, 3.6)))

p.save('logo.pdf', pad_inches=-0.04)

# Remove the project name
p.layers = p.layers.__class__(p.layers[1:])
p.save('logo-small.pdf', pad_inches=-0.04)
示例#35
0
from __future__ import absolute_import, division, print_function

import numpy as np
import pandas as pd
from mizani.transforms import trans_new

from plotnine import (ggplot, aes, geom_bar, coord_flip,
                      coord_fixed, coord_trans)

n = 10  # Some even number greater than 2

# ladder: 0 1 times, 1 2 times, 2 3 times, ...
df = pd.DataFrame({'x': np.repeat(range(n+1), range(n+1)),
                   'z': np.repeat(range(n//2), range(3, n*2, 4))})

p = (ggplot(df, aes('x'))
     + geom_bar(aes(fill='factor(z)'), show_legend=False))


def test_coord_flip():
    assert p + coord_flip() == 'coord_flip'


def test_coord_fixed():
    assert p + coord_fixed(0.5) == 'coord_fixed'


def test_coord_trans():
    double_trans = trans_new('double', np.square, np.sqrt)
    assert p + coord_trans(y=double_trans) == 'coord_trans'
示例#36
0
def test_calculated_expressions():
    p = (ggplot(mtcars, aes(x='factor(cyl)', y='..count..+1'))
         + geom_bar())
    # No exception
    p._build()
示例#37
0
def test_dodge_preserve_single():
    df1 = pd.DataFrame({'x': ['a', 'b', 'b'],
                        'y': ['a', 'a', 'b']})
    p = (ggplot(df1, aes('x', fill='y')) +
         geom_bar(position=position_dodge(preserve='single')))
    assert p + _theme == 'dodge_preserve_single'
示例#38
0
文件: jmlr.py 项目: Pinafore/qb
def error_comparison():
    char_frames = {}
    first_frames = {}
    full_frames = {}
    train_times = {}
    use_wiki = {}
    best_accuracies = {}
    for p in glob.glob(f'output/guesser/best/qanta.guesser*/guesser_report_guesstest.pickle', recursive=True):
        with open(p, 'rb') as f:
            report = pickle.load(f)
            name = report['guesser_name']
            params = report['guesser_params']
            train_times[name] = params['training_time']
            use_wiki[name] = params['use_wiki'] if 'use_wiki' in params else False
            char_frames[name] = report['char_df']
            first_frames[name] = report['first_df']
            full_frames[name] = report['full_df']
            best_accuracies[name] = (report['first_accuracy'], report['full_accuracy'])
    first_df = pd.concat([f for f in first_frames.values()]).sort_values('score', ascending=False).groupby(['guesser', 'qanta_id']).first().reset_index()
    first_df['position'] = ' Start'
    full_df = pd.concat([f for f in full_frames.values()]).sort_values('score', ascending=False).groupby(['guesser', 'qanta_id']).first().reset_index()
    full_df['position'] = 'End'
    compare_df = pd.concat([first_df, full_df])
    compare_df = compare_df[compare_df.guesser != 'qanta.guesser.vw.VWGuesser']
    compare_results = {}
    comparisons = ['qanta.guesser.dan.DanGuesser', 'qanta.guesser.rnn.RnnGuesser', 'qanta.guesser.elasticsearch.ElasticSearchGuesser']
    cr_rows = []
    for (qnum, position), group in compare_df.groupby(['qanta_id', 'position']):
        group = group.set_index('guesser')
        correct_guessers = []
        wrong_guessers = []
        for name in comparisons:
            if group.loc[name].correct == 1:
                correct_guessers.append(name)
            else:
                wrong_guessers.append(name)
        if len(correct_guessers) > 3:
            raise ValueError('this should be unreachable')
        elif len(correct_guessers) == 3:
            cr_rows.append({'qnum': qnum, 'Position': position, 'model': 'All', 'Result': 'Correct'})
        elif len(correct_guessers) == 0:
            cr_rows.append({'qnum': qnum, 'Position': position, 'model': 'All', 'Result': 'Wrong'})
        elif len(correct_guessers) == 1:
            cr_rows.append({
                'qnum': qnum, 'Position': position,
                'model': to_shortname(correct_guessers[0]),
                'Result': 'Correct'
            })
        else:
            cr_rows.append({
                'qnum': qnum, 'Position': position,
                'model': to_shortname(wrong_guessers[0]),
                'Result': 'Wrong'
            })
    cr_df = pd.DataFrame(cr_rows)
    # samples = cr_df[(cr_df.Position == ' Start') & (cr_df.Result == 'Correct') & (cr_df.model == 'RNN')].qnum.values
    # for qid in samples:
    #     q = lookup[qid]
    #     print(q['first_sentence'])
    #     print(q['page'])
    #     print()
    p = (
        ggplot(cr_df)
        + aes(x='model', fill='Result') + facet_grid(['Result', 'Position']) #+ facet_wrap('Position', labeller='label_both')
        + geom_bar(aes(y='(..count..) / sum(..count..)'), position='dodge')
        + labs(x='Models', y='Fraction with Corresponding Result') + coord_flip()
        + theme_fs() + theme(aspect_ratio=.6)
    )
    p.save('output/plots/guesser_error_comparison.pdf')