Пример #1
0
    def run_item_clicked(self, item):
        logging.info('Run item %s clicked' % item.text(0))
        output = io.StringIO()
        if item.parent() is not None:
            if item.parent().text(0) == 'Variables':
                cpt = self._concrete_model.find_component(item.text(0))

                # create ggplot
                df = mo.get_entity(cpt)
                if item.text(0) == 'S':
                    ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\
                         pn.geom_step(pn.aes(color='States'), direction='hv') + pn.facet_wrap('States')
                elif item.text(0) == 'Q':
                    ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\
                         pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('J~')
                else:
                    ff = pn.ggplot(df, pn.aes('T', item.text(0))) + pn.ggtitle(cpt.doc) +\
                         pn.geom_step(pn.aes(color='J'), direction='hv') + pn.facet_grid('I~')
                size = self.canvas.size()
                ff += pn.theme(figure_size=(size.width() / 100, size.height() / 100))

                # update to the new figure
                fig = ff.draw()
                self.canvas.figure = fig
                self.canvas.draw()

        output.close()
Пример #2
0
def test_labels():
    """
    Test invalid arguments to chart components
    """
    gg = ggplot(df, aes(x='x', y='y'))
    gg = gg + geom_point()
    gg = gg + xlab('xlab')
    gg = gg + ylab('ylab')
    gg = gg + ggtitle('title')

    assert gg.labels['x'] == 'xlab'
    assert gg.labels['y'] == 'ylab'
    assert gg.labels['title'] == 'title'

    gg = gg + labs(x='xlab2', y='ylab2', title='title2')
    assert gg.labels['x'] == 'xlab2'
    assert gg.labels['y'] == 'ylab2'
    assert gg.labels['title'] == 'title2'

    with pytest.raises(PlotnineError):
        gg = gg + xlab(None)

    with pytest.raises(PlotnineError):
        gg = gg + ylab(None)

    with pytest.raises(PlotnineError):
        gg = gg + ggtitle(None)

    with pytest.raises(PlotnineError):
        gg = gg + labs('x', 'y')
Пример #3
0
def plot_hypothesis(hypothesis, file_name):
    bin_types = list(hypothesis)
    scores = list(hypothesis[bin_types[0]])
    plots = []
    for bin_type, score in product(bin_types, scores):
        mean_name = "Mean: " + score
        df = pd.DataFrame(columns=["Bin", "Dataset", mean_name])
        df2 = pd.DataFrame(columns=["Bin", "t-statistic", 'p-value'])
        for bin_ in hypothesis[bin_type][score]:
            h = list(bin_.values())[0]
            bin_name = list(bin_)[0]
            parameter1 = h.p1
            parameter2 = h.p2
            mean1 = h.mean1
            mean2 = h.mean2
            row1 = {
                "Bin": bin_name,
                'Dataset': parameter1,
                mean_name: str(round(float(mean1), 3))
            }
            row2 = {
                "Bin": bin_name,
                'Dataset': parameter2,
                mean_name: str(round(float(mean2), 3))
            }
            df = df.append(row1, ignore_index=True)
            df = df.append(row2, ignore_index=True)
            t_statistic = h.t
            p_value = h.p
            row = {
                "Bin":
                bin_name,
                't-statistic':
                str(round(t_statistic, 3)),
                'p-value':
                str(p_value),
                '95% Confidence':
                "Significant" if p_value <= 0.05 else "Not Significant"
            }
            df2 = df2.append(row, ignore_index=True)
        plots.append(
            (ggplot(df, aes(x='Bin', y=mean_name, fill='Dataset')) +
             geom_col(stat='identity', position='dodge') +
             ggtitle("{0} bin distribution| {1}\nBin's Average Scores".format(
                 bin_type, score))))
        plots.append(
            (ggplot(df2, aes(x='Bin', y='p-value', fill='95% Confidence')) +
             geom_col(stat='identity', width=0.2) + ggtitle(
                 "{0} bin distribution| {1}\nBin's 95% Confidence Level Test".
                 format(bin_type, score)) +
             scale_fill_manual(values={
                 'Significant': "#214517",
                 'Not Significant': '#c62f2d'
             })))
    save_as_pdf_pages(plots, file_name)

    return
Пример #4
0
def round_2_plot():
    if not os.path.exists(round_2_df_path):
        eprint(f'Downloading {round_2_df_url} to {round_2_df_path}')
        urlretrieve(round_2_df_url, round_2_df_path)
    verify_checksum(round_2_df_checksum, round_2_df_path)
    df = pd.read_json(round_2_df_path)
    p = (
        ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') +
        facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin(
            fun_data=mean_no_se, bins=20, shape='.', linetype='None',
            size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
        scale_x_continuous(breaks=[0, .5, 1]) +
        coord_cartesian(ylim=[0, 0.7]) +
        ggtitle('Round 2 Attacks and Models') +
        xlab('Percent of Question Revealed') + ylab('Accuracy') + theme(
            #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
            strip_text_x=element_text(margin={
                't': 6,
                'b': 6,
                'l': 1,
                'r': 5
            })) +
        scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                           name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
Пример #5
0
def round_1_plot():
    df = pd.read_csv('2019_tacl_trick/data/round_1.csv')
    model_dtype = CategoricalDtype(['DAN', 'RNN', 'IR'], ordered=True)
    df['Model'] = df['Model'].astype(model_dtype)

    # This following is a hack so that the legend widths are the same across plots
    def rename(x):
        if x == 'Round 1 - IR Adversarial':
            return 'Round 1 - IR Adversarial    '
        else:
            return x

    df['Dataset'] = df['Dataset'].map(rename)
    p = (ggplot(df) + aes(x='x', y='y', color='Dataset') +
         facet_wrap('Model', nrow=1) + geom_point(size=1.0, shape='o') +
         scale_y_continuous(breaks=np.linspace(0, 1, 6), limits=[0, 0.6]) +
         scale_x_continuous(breaks=[0, .5, 1]) +
         xlab('Percent of Question Revealed') + ylab('Accuracy') +
         ggtitle('Round 1 Attacks and Models') +
         theme(strip_text_x=element_text(margin={
             't': 6,
             'b': 6,
             'l': 1,
             'r': 5
         })) + scale_color_manual(
             values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
             name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_1_csv.pdf', width=7.0, height=1.7)
def plot_replicate_density(
    df,
    batch,
    plate,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="pairwise_correlation", fill="replicate_info"))
        + gg.geom_density(alpha=0.3) + gg.scale_fill_manual(
            name="Replicate",
            labels={
                "True": "True",
                "False": "False"
            },
            values=["#B99638", "#2DB898"],
        ) + gg.xlab("Pearson Correlation") + gg.ylab("Density") +
        gg.ggtitle("{}: {}".format(batch, plate)) + gg.theme_bw() + gg.theme(
            title=gg.element_text(size=9),
            axis_text=gg.element_text(size=5),
            axis_title=gg.element_text(size=8),
            legend_text=gg.element_text(size=6),
            legend_title=gg.element_text(size=7),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        ))

    if output_file_base:
        save_figure(density_gg, output_file_base, output_file_extensions, dpi,
                    height, width)

    return density_gg
def event_counts_date(request_disc=None):
    '''
    Plot the average timeline of a certain institution
    request should be given as a dictionary
    '''
    request = np.ones(df.shape[0], dtype=bool)
    for key in request_disc.keys():
        if key == "institution":
            request = request & (df[key].str.contains(request_disc[key]))
        else:
            request = request & (df[key] == request_disc[key])
    df_selected = df[request]
    df_selected["date_md"] = df_selected["admission_date"].apply(
        lambda dt: dt.replace(year=1980))
    df_selected["year"] = df_selected["admission_date"].apply(
        lambda dt: dt.year)

    samp = df[request].iloc[0]
    title = ""
    for key in request_disc.keys():
        title += samp[key]
        title += " "
    gg = p9.ggplot(df_selected)
    gg += p9.aes(x="date_md", y="admission_status")
    gg += p9.scale_x_datetime(date_breaks='10 days',
                              date_labels="%m-%d",
                              limits=np.array([
                                  np.min(df_selected["date_md"]),
                                  pd.to_datetime("1980-4-20")
                              ]))
    gg += p9.geom_count()
    gg += p9.ggtitle(title)
    return gg
Пример #8
0
def density(X, y, sreg, treg):
    """
    Plot the 2d-density of the size vs correlation data.

    Parameters:
    - - - - -
    X: float, array
        independent variable
    y: float, array
        dependent variable
    
    Returns:
    - - - -
    g: figure
        density plot
    """

    df = pd.DataFrame({'Size': X, 'Correlation': y})

    g = (ggplot(df, aes('Size', 'Correlation')) +
         geom_point(alpha=0.5, size=0.25) +
         geom_density_2d(size=1, color='r') + plotnine.ggtitle(
             'Dispersion Correlations\n{:} --> {:}'.format(sreg, treg)))

    return g
Пример #9
0
    def plot_overlap_duration(self, data, options):
        matches = data["matches"]
        matches = matches.loc[matches.tag_overlap > 0]
        # matches.loc[:, "log_dur"] = log()

        plt = ggplot(data=matches, mapping=aes(x="tag_duration", y="tag_overlap",),)
        plt = (
            plt
            + geom_point()
            + xlab("Tag duration")
            + ylab("Proportion tag overlapping with matching event")
            + theme_classic()
            + theme(
                axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}),
                plot_title=element_text(
                    weight="bold", size=14, margin={"t": 10, "b": 10}
                ),
                figure_size=(10, 10),
                text=element_text(size=12, weight="bold"),
            )
            + ggtitle(
                (
                    "Proportion of tag overlapping with matching event depending on duration "
                    + "size for model {}, database {}, class {}\n"
                    + "with detector options {}"
                ).format(
                    options["scenario_info"]["model"],
                    options["scenario_info"]["database"],
                    options["scenario_info"]["class"],
                    options,
                )
            )
        )

        return plt
Пример #10
0
def predictionContour(fit, data, y, title, density=51):
    data = data.copy()
    y = y.copy().astype(str)

    def predictor(g, h):
        dfgh = pd.DataFrame({data.columns[0]: [g]})
        dfgh[data.columns[1]] = [h]
        return fit.predict_proba(dfgh)[0, 1]

    data["class"] = y
    xrng = (0.5 * np.floor(2.0 * min(data.iloc[:, 0])),
            0.5 * np.ceil(2.0 * max(data.iloc[:, 0])))
    yrng = (0.5 * np.floor(2.0 * min(data.iloc[:, 1])),
            0.5 * np.ceil(2.0 * max(data.iloc[:, 1])))
    out = ggfuntile(predictor,
                    data,
                    xrng=xrng,
                    yrng=yrng,
                    density=density,
                    xlab=data.columns[0],
                    ylab=data.columns[1],
                    zlab="P(Y=1)",
                    breaks=[-np.inf, 0.5, np.inf])
    out += ggtitle(title)
    return out
Пример #11
0
def p(N=3):
    """Return *N* distinct plot objects."""
    template = (
        ggplot(aes(x='wt', y='mpg', label='name'), data=mtcars) +
        geom_text()
        )
    for i in range(1, N+1):
        yield template + ggtitle('%d of %d' % (i, N))
Пример #12
0
def plot_scores(df, title=None, xlab=None, ylab=None):
    g = (gg.ggplot(df, gg.aes(x=cfg.SCORE_COLNAME_X, y=cfg.SCORE_COLNAME_Y)) +
         gg.geom_line())
    if title is not None:
        g += gg.ggtitle(title)
    if xlab is not None:
        g += gg.xlab(xlab)
    if ylab is not None:
        g += gg.ylab(ylab)
    return g
Пример #13
0
 def comparison_plot(self,
                     df: pd.DataFrame,
                     xmin=None,
                     xmax=None,
                     bw="normal_reference",
                     **kwargs):
     return (ggplot(df, aes(df.columns[1], fill=df.columns[0])) +
             scale_fill_brewer(type="qual", palette="Pastel1") +
             geom_density(bw=bw, alpha=0.8) + ggtitle(self.plot_title) +
             self._scale_x(xmin, xmax) + ergo_theme)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24, format_string='{:.1%}') +
      scale_x_discrete(limits=self._data["pattern"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Design Pattern Counts") + xlab("Design Pattern") +
      ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=24, height=8)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="count", label="..count..")) +
      geom_bar(fill="#1e4f79") +
      geom_text(stat="count", va='bottom', size=24) +
      scale_x_discrete(limits=[
          "1", "2", "3", "5", "26", "52", "97", "100", "300", "537"
      ]) + scale_y_continuous(breaks=[0, 5, 10], limits=[0, 10]) +
      ggtitle("Case Study Sizes") + xlab("Number of Projects") +
      ylab("Number of Case Studies") +
      theme_classic(base_size=28, base_family="Helvetica") +
      theme(text=element_text(size=28))).save(file_path, width=14, height=7)
Пример #16
0
 def density_plot(
     self,
     df: pd.DataFrame,
     xmin=None,
     xmax=None,
     fill: str = "#fbb4ae",
     bw="normal_reference",
     **kwargs,
 ):
     return (ggplot(df, aes(df.columns[0])) +
             geom_density(fill=fill, alpha=0.8) + ggtitle(self.plot_title) +
             self._scale_x(xmin, xmax) + ergo_theme)
Пример #17
0
def plot_replicate_correlation(
    df,
    batch,
    plate,
    facet_string=None,
    split_samples=False,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=500,
    height=4,
    width=5,
    return_plot=False,
):
    correlation_gg = (
        gg.ggplot(
            df,
            gg.aes(x="group_replicate", y="similarity_metric", fill="group_replicate"),
        )
        + gg.geom_boxplot(
            alpha=0.3, outlier_alpha=0, width=0.8, notchwidth=0.25, fatten=1.5
        )
        + gg.geom_jitter(shape=".", size=0.001, alpha=0.3, width=0.3, height=0)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Replicates")
        + gg.ylab("Pearson Correlation")
        + gg.ggtitle("{}: {}".format(batch, plate))
        + gg.theme_bw()
        + gg.theme(
            subplots_adjust={"wspace": 0.2},
            title=gg.element_text(size=5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=5),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=5),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if split_samples:
        assert facet_string, "To split samples, specify a facet_string"
        correlation_gg += gg.facet_wrap(facet_string)

    if output_file_base:
        save_figure(
            correlation_gg, output_file_base, output_file_extensions, dpi, height, width
        )
    if return_plot:
        return correlation_gg
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="category", y="count", label="percent")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24) +
      scale_x_discrete(limits=self._data["category"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Classes per Category") + xlab("Category") +
      ylab("Number of Classes") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=7, height=7)
Пример #19
0
 def plot(self,
          plotDat,
          tag=None,
          log=True,
          by='cell_type',
          data_set=None,
          title=None,
          alpha=.4):
     pDat = plotDat.copy()
     gcorr = pearsonr(pDat.measured, pDat.prediction)[0]
     corrs = pDat.groupby(
         pDat[by]).apply(lambda x: pearsonr(x.measured, x.prediction)[0])
     pDat['corr'] = corrs[pDat[by]].values
     by_str = '{}_pearson'.format(by)
     pDat[by_str] = pDat.apply(
         lambda x: '{} {:.2f}'.format(x[by], corrs[x[by]]), axis=1)
     if data_set:
         pDat = pDat.loc[pDat['dataset_name'] == data_set]
     pl = (pn.ggplot(pn.aes('measured', 'prediction', color=by_str), pDat) +
           pn.geom_point(alpha=alpha) + pn.stat_smooth(mapping=pn.aes(
               'measured', 'prediction', color=by_str),
                                                       method='lm',
                                                       geom='line',
                                                       alpha=0.5,
                                                       se=False,
                                                       inherit_aes=False))
     if len(pDat['sample'].unique()) < 10:
         pl = pl + pn.aes(shape='sample')
     else:
         pl = pl + pn.aes(shape='dataset_name')
     if log is True:
         pl = pl + pn.scale_x_log10() + pn.scale_y_log10()
     if title is not None:
         pl = pl + pn.ggtitle(title)
     elif tag is not None:
         pl = pl + pn.ggtitle('{} pearson={}'.format(tag, gcorr))
     else:
         pl = pl + pn.ggtitle('pearson={}'.format(gcorr))
     return pl
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes("value")) +
      geom_histogram(bins=100, fill="#1e4f79") +
      facet_wrap(facets="variable", scales="free", ncol=3) +
      scale_x_continuous(trans=asinh_trans(), labels=asinh_labels) +
      scale_y_continuous(labels=comma_format()) +
      ggtitle("Distributions of QMOOD Quality Attributes") +
      xlab("Quality Attribute Value") + ylab("Number of Projects") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            subplots_adjust={
                "wspace": 0.35,
                "hspace": 0.35
            })).save(file_path, width=24, height=12)
Пример #21
0
def export_graph(graphname, df, columns, plot):
    print('Not provided:')
    for c in columns:
        outstr = "{} events, {}".format(
            *len_and_pct(df[filter_notprovided(df[c], keep=True)], df))
        with open(
                "../report/src/numbers/" + "np-" + graphname + "-" + c +
                ".tex", "w") as f:
            f.write(outstr)
        print(f'- {c}: {outstr}')

    (plot + p9.ggtitle("")).save("../report/src/images/" + graphname + ".png",
                                 dpi=300)
    print(plot)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes("loc")) +
      geom_histogram(bins=100, fill="#1e4f79") +
      facet_grid(facets="category ~ .", scales='free_y') +
      scale_x_continuous(trans=asinh_trans(), labels=asinh_labels) +
      scale_y_continuous(labels=comma_format())
      #+ scale_y_continuous(labels=lambda l: ["%.2f%%" % (v * 100 / len(self._data)) for v in l])
      + ggtitle("Class Sizes") + xlab("Lines of Code") +
      ylab("Number of Classes") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32), subplots_adjust={"hspace": 0.1
                                                         })).save(file_path,
                                                                  width=8,
                                                                  height=18)
Пример #23
0
def plot_result_stats(results, title):
    stats = results.describe().unstack().reset_index().rename(columns={
        "level_0": "metric",
        "level_1": "group",
        0: "value"
    })
    stats = stats[~stats["group"].isin(["count", "min", "max"])]
    stats["value_presentation"] = round(stats["value"], 2)
    plot = (p9.ggplot(stats) + p9.aes("metric", "value", fill="group") +
            p9.geom_col(position="dodge") + p9.theme_bw() +
            p9.coord_cartesian(ylim=[0, 1.0]) + p9.ggtitle(title) +
            p9.geom_text(p9.aes(label="value_presentation"),
                         position=p9.position_dodge(width=0.9),
                         va="bottom"))
    return plot
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes("value")) +
      geom_histogram(bins=100, fill="#1e4f79") +
      facet_wrap(facets="variable", scales="free", ncol=3) + xlim(0, 1) +
      scale_y_continuous(labels=comma_format()) +
      ggtitle("Intensity of Design Pattern Use") +
      xlab("Percentage of Classes Participating in Design Pattern") +
      ylab("Number of Projects") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_title_y=element_text(margin={"r": 40}),
            subplots_adjust={
                "wspace": 0.3,
                "hspace": 0.5
            })).save(file_path, width=24, height=24)
Пример #25
0
    def plot_overlap_duration_bar(self, data, options):
        matches = data["matches"]
        matches = matches.loc[matches.tag_overlap > 0]
        matches.loc[:, "tag_overlap_bin"] = pd.cut(
            matches.tag_overlap, [0, 0.25, 0.5, 0.75, 1]
        )
        matches.loc[:, "tag_duration_bin"] = pd.cut(
            matches.tag_duration, [0, 0.25, 0.5, 0.75, 1, 1.5, 2, float("inf")]
        )

        matches.loc[matches.tag_overlap < 0.3].to_csv("small_overlap.csv")

        # matches.loc[:, "log_dur"] = log()

        plt = ggplot(
            data=matches, mapping=aes(x="tag_duration_bin", fill="tag_overlap_bin",),
        )
        plt = (
            plt
            + geom_bar()
            + xlab("Tag duration")
            + ylab("Proportion tag overlapping with matching event")
            + theme_classic()
            + theme(
                axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}),
                plot_title=element_text(
                    weight="bold", size=14, margin={"t": 10, "b": 10}
                ),
                figure_size=(10, 10),
                text=element_text(size=12, weight="bold"),
            )
            + ggtitle(
                (
                    "Proportion of tag overlapping with matching event depending on duration "
                    + "size for model {}, database {}, class {}\n"
                    + "with detector options {}"
                ).format(
                    options["scenario_info"]["model"],
                    options["scenario_info"]["database"],
                    options["scenario_info"]["class"],
                    options,
                )
            )
        )

        return plt
Пример #26
0
def plot_dist_with_ci(dist):
    return (pn.ggplot(dist, pn.aes(x='estimates')) +
            pn.geom_histogram(bins=25) + pn.geom_vline(
                xintercept=dist.quantile(0.025),
                color="#FF5500",
                size=2,
                linetype='dotted',
            ) + pn.geom_vline(
                xintercept=dist.quantile(0.975),
                color="#FF5500",
                size=2,
                linetype='dotted',
            ) + pn.ggtitle("${0:,.0f} ({1:,.0f}, {2:,.0f})".format(
                np.mean(dist.estimates),
                dist.estimates.quantile(0.025),
                dist.estimates.quantile(0.975),
            )))
Пример #27
0
    def create(self, file_path: str) -> None:
        metrics = self._data["metric"].unique()

        for metric in metrics:
            data = self._data[self._data["metric"] == metric]
            q75, q25 = np.percentile(data["value"], [98, 2])

            (ggplot(data, aes(x="category", y="value")) +
             geom_boxplot(outlier_shape="") +
             coord_cartesian(ylim=(q75 * 0.8, q25 * 1.2))
             #+ facet_wrap(facets="metric", scales="free", ncol=3)
             + ggtitle(metric)
             #+ ggtitle("QMOOD Quality Attributes")
             + xlab("Category") + ylab("Value") +
             theme_classic(base_size=28, base_family="Helvetica")
             #+ theme(subplots_adjust={"wspace": 0.25, "hspace": 0.2})
             ).save(f"{file_path}.{metric}.pdf", width=24, height=24)
Пример #28
0
def plot_replicate_density(
    df,
    batch,
    plate,
    cutoff,
    percent_strong,
    output_file_base=None,
    output_file_extensions=[".png", ".pdf", ".svg"],
    dpi=300,
    height=1.5,
    width=2,
    return_plot=False,
):
    density_gg = (
        gg.ggplot(df, gg.aes(x="similarity_metric", fill="group_replicate"))
        + gg.geom_density(alpha=0.3)
        + gg.scale_fill_manual(
            name="Replicate",
            labels={"True": "True", "False": "False"},
            values=["#B99638", "#2DB898"],
        )
        + gg.xlab("Pearson Correlation")
        + gg.ylab("Density")
        + gg.geom_vline(xintercept=cutoff, color="red", linetype="dashed")
        + gg.ggtitle(
            f"{batch}; Plate: {plate}\n\nPercent Replicating: {np.round(percent_strong * 100, 2)}%"
        )
        + gg.theme_bw()
        + gg.theme(
            title=gg.element_text(size=3.5),
            axis_text=gg.element_text(size=4),
            axis_title=gg.element_text(size=4),
            legend_text=gg.element_text(size=4),
            legend_title=gg.element_text(size=4),
            strip_text=gg.element_text(size=4, color="black"),
            strip_background=gg.element_rect(colour="black", fill="#fdfff4"),
        )
    )

    if output_file_base:
        save_figure(
            density_gg, output_file_base, output_file_extensions, dpi, height, width
        )

    if return_plot:
        return density_gg
def image_histogram():
    # create windows
    cv2.namedWindow('image', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw', cv2.WINDOW_NORMAL)
    cv2.namedWindow('image_bw_eq', cv2.WINDOW_NORMAL)

    # read and work with image
    image = cv2.imread(r"image.jpg")
    image_bw = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image_bw_eq = cv2.equalizeHist(image_bw)

    # display images
    cv2.imshow('image', image)
    cv2.imshow('image_bw', image_bw)
    cv2.imshow('image_bw_eq', image_bw_eq)

    # calculate histogram
    # np_hist_y, bins = np.histogram(image_bw.ravel(), 256, [0, 256])
    # hist = np.bincount(image_bw.ravel(), minlength=256) # faster version of np.histogram
    # plt.hist(image_bw.ravel(), bins=256)
    hist_bw = cv2.calcHist([image_bw], [0], None, [256], [0, 255])
    hist_bw_eq = cv2.calcHist([image_bw_eq], [0], None, [256], [0, 255])
    np_hist_x = np.arange(len(hist_bw))
    d = {
        'np_hist_x': np_hist_x,
        'hist_bw': hist_bw.flatten(),
        'hist_bw_eq': hist_bw_eq.flatten()
    }
    df = pd.DataFrame(data=d)

    # plot histogram
    pn_handle = pn.ggplot(df) + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw'), color=None, fill='red', alpha=0.5) + pn.ylab('occurences') \
                              + pn.geom_col(pn.aes(x='np_hist_x', y='hist_bw_eq'), color=None, fill='green', alpha=0.5) \
                              + pn.ggtitle('Histograms of bw images')
    pn_handle.draw()
    plt.show()

    while True:
        pressed_key = cv2.waitKey(16)
        if pressed_key == ord('q'):
            break

    # cleanup opencv
    cv2.destroyAllWindows()
Пример #30
0
def main(argv: List[str]) -> None:
    parser = argparse.ArgumentParser()

    parser.add_argument("roll_rule", type=RollRule, choices=list(RollRule))
    parser.add_argument("--num_iterations", type=int, default=10000)
    parser.add_argument("--seed", type=int, default=None)
    parser.add_argument("--plot_file", default="ability_roll_distribution.png")

    args = parser.parse_args(argv)

    if args.seed is not None:
        random.seed(args.seed)

    # Run the simulation and process the data
    roll_counts = simulate(args.roll_rule, args.num_iterations)
    data = process_data(roll_counts)

    # Calculate statistics
    mean = sum(data["value"] * data["percent"] / 100.0)
    mode = data.iloc[data["count"].idxmax()]["value"]
    stddev = math.sqrt(
        sum(data["percent"] / 100.0 * (data["value"] - mean)**2.0))
    skewness = pearson_first_skewness(mean, mode, stddev)

    # Print out result information
    print(data)
    print()
    print("Mean:", mean)
    print("Mode:", mode)
    print("Standard deviation:", stddev)
    print("Skewness:", skewness)

    # Plot the data
    plot = (plt9.ggplot(data, plt9.aes("value", "percent")) +
            plt9.geom_bar(stat="identity") +
            plt9.geom_vline(xintercept=mean, color="black") +
            plt9.xlim(0, 21) + plt9.ylab("Chance (%)") +
            plt9.xlab("Ability Score") +
            plt9.ggtitle("Ability Score Distribution ({} iterations)".format(
                args.num_iterations)))

    plot.save(args.plot_file, dpi=300)
    print("Wrote plot image to:", args.plot_file)
Пример #31
0
def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')
Пример #32
0
def titles(t, *s): # title, hacky subtitles
    if not s:
        s = [commission_string]
    s = ['${}_{%s}$' % (x,) for x in s]
    return gg.ggtitle('\n'.join(['$%s$' % (t,)] + s))
Пример #33
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )