Пример #1
0
def plot_pointplot(plot_df, y_axis_label="", use_log10=False, limits=[0, 3.2]):
    """
    Plots the pointplot
    Arguments:
        plot_df - the dataframe that contains the odds ratio and lemmas
        y_axis_label - the label for the y axis
        use_log10 - use log10 for the y axis?
    """
    graph = (
        p9.ggplot(plot_df, p9.aes(x="lemma", y="odds_ratio")) +
        p9.geom_pointrange(p9.aes(ymin="lower_odds", ymax="upper_odds"),
                           position=p9.position_dodge(width=1),
                           size=0.3,
                           color="#253494") +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        (p9.scale_y_log10() if use_log10 else p9.scale_y_continuous(
            limits=limits)) +
        p9.geom_hline(p9.aes(yintercept=1), linetype='--', color='grey') +
        p9.coord_flip() + p9.theme_seaborn(
            context='paper', style="ticks", font_scale=1, font='Arial') +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            panel_grid_minor=p9.element_blank(),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10)) +
        p9.labs(x=None, y=y_axis_label))
    return graph
Пример #2
0
 def __init__(self):
     pn.theme_minimal.__init__(self, base_family='Open Sans')
     self.add_theme(pn.theme(
         axis_title=pn.element_text(size=10),
         axis_title_y=pn.element_text(margin={'r': 12}),
         panel_border=pn.element_rect(color='gainsboro', size=1, fill=None)
     ), inplace=True)
Пример #3
0
def plot_fundamentals(df, stock) -> str:
    assert isinstance(df, pd.DataFrame)
    columns_to_report = ["pe", "eps", "annual_dividend_yield", "volume", \
                    "last_price", "change_in_percent_cumulative", \
                    "change_price", "market_cap", "number_of_shares"]
    colnames = df.columns
    for column in columns_to_report:
        assert column in colnames
   
    df["volume"] = df["last_price"] * df["volume"] / 1000000  # again, express as $(M)
    df["market_cap"] /= 1000 * 1000
    df["number_of_shares"] /= 1000 * 1000
    df["fetch_date"] = df.index
    plot_df = pd.melt(
        df,
        id_vars="fetch_date",
        value_vars=columns_to_report,
        var_name="indicator",
        value_name="value",
    )
    plot_df["value"] = pd.to_numeric(plot_df["value"])
    plot_df["fetch_date"] = pd.to_datetime(plot_df["fetch_date"])

    plot = (
        p9.ggplot(plot_df, p9.aes("fetch_date", "value", color="indicator"))
        + p9.geom_line(size=1.5, show_legend=False)
        + p9.facet_wrap("~ indicator", nrow=len(columns_to_report), ncol=1, scales="free_y")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), 
                   axis_text_y=p9.element_text(size=7),
                   figure_size=(8, len(columns_to_report)))
        #    + p9.aes(ymin=0)
        + p9.xlab("")
        + p9.ylab("")
    )
    return plot_as_inline_html_data(plot)
Пример #4
0
def plot_series(
        df,
        x=None,
        y=None,
        tick_text_size=6,
        line_size=1.5,
        y_axis_label="Point score",
        x_axis_label="",
        color="stock",
        use_smooth_line=False
):
    assert len(df) > 0
    assert len(x) > 0 and len(y) > 0
    assert line_size > 0.0
    assert isinstance(tick_text_size, int) and tick_text_size > 0
    assert y_axis_label is not None
    assert x_axis_label is not None
    args = {'x': x, 'y': y}
    if color:
        args['color'] = color
    plot = p9.ggplot(df, p9.aes(**args)) \
        + p9.labs(x=x_axis_label, y=y_axis_label) \
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=tick_text_size),
            axis_text_y=p9.element_text(size=tick_text_size),
            legend_position="none",
        )
    if use_smooth_line:
        plot += p9.geom_smooth(size=line_size)
    else:
        plot += p9.geom_line(size=line_size)
    return plot_as_inline_html_data(plot)
Пример #5
0
def plot_bargraph(count_plot_df, plot_df):
    """
    Plots the bargraph 
    Arguments:
        count_plot_df - The dataframe that contains lemma counts
        plot_df - the dataframe that contains the odds ratio and lemmas
    """

    graph = (
        p9.ggplot(count_plot_df.astype({"count": int}),
                  p9.aes(x="lemma", y="count")) +
        p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") +
        p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') +
        p9.scale_x_discrete(limits=(plot_df.sort_values(
            "odds_ratio", ascending=True).lemma.tolist())) +
        p9.scale_y_continuous(labels=custom_format('{:,.0g}')) +
        p9.labs(x=None) + p9.theme_seaborn(
            context='paper', style="ticks", font="Arial", font_scale=0.95) +
        p9.theme(
            # 640 x 480
            figure_size=(6.66, 5),
            strip_background=p9.element_rect(fill="white"),
            strip_text=p9.element_text(size=12),
            axis_title=p9.element_text(size=12),
            axis_text_x=p9.element_text(size=10),
        ))
    return graph
Пример #6
0
    def plot_overlap_duration(self, data, options):
        matches = data["matches"]
        matches = matches.loc[matches.tag_overlap > 0]
        # matches.loc[:, "log_dur"] = log()

        plt = ggplot(data=matches, mapping=aes(x="tag_duration", y="tag_overlap",),)
        plt = (
            plt
            + geom_point()
            + xlab("Tag duration")
            + ylab("Proportion tag overlapping with matching event")
            + theme_classic()
            + theme(
                axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}),
                plot_title=element_text(
                    weight="bold", size=14, margin={"t": 10, "b": 10}
                ),
                figure_size=(10, 10),
                text=element_text(size=12, weight="bold"),
            )
            + ggtitle(
                (
                    "Proportion of tag overlapping with matching event depending on duration "
                    + "size for model {}, database {}, class {}\n"
                    + "with detector options {}"
                ).format(
                    options["scenario_info"]["model"],
                    options["scenario_info"]["database"],
                    options["scenario_info"]["class"],
                    options,
                )
            )
        )

        return plt
Пример #7
0
def plot_downstream(clwe, table, output, ylim):
    df = pd.read_csv(data_file(table))
    df = df[df.clwe == clwe]
    df = df.assign(
        refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']),
        language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG'])
    )
    g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine'))
    g += p9.geom_bar(position='dodge', stat='identity', width=.8)
    g += p9.coord_cartesian(ylim=ylim)
    g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E'])
    g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial')
    g += p9.theme(
        plot_background=p9.element_rect(fill='white'),
        panel_grid_major_y=p9.element_line(),
        axis_text_x=p9.element_text(margin={'t': 10}),
        axis_text_y=p9.element_text(margin={'r': 8}),
        legend_position=(.7, .9),
        legend_direction='horizontal',
        legend_title=p9.element_blank(),
        legend_text=p9.element_text(size=FONT_SIZE),
        legend_box_margin=0,
        figure_size=(12, 3)
    )
    g.save(filename=output_file(output))
Пример #8
0
def plot_predict(forecast):
    p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) +
         geom_point(colour='blue', alpha=0.3, na_rm=True) +
         geom_line(colour='blue', na_rm=True) + geom_line(
             data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') +
         geom_ribbon(data=forecast,
                     mapping=aes(ymin='yhat_lower', ymax='yhat_upper'),
                     fill='blue',
                     alpha=0.1) +
         scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') +
         xlab('Time') + ylab('Pressure') + theme_bw() +
         theme(axis_text_x=element_text(
             angle=45, hjust=1, face='bold', color='black'),
               axis_text_y=element_text(face='bold', colour='black')))

    ggplot.save(p,
                filename='predict_pressure_chart.png',
                path=os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                  'png'),
                width=8,
                height=6,
                units='in',
                dpi=326,
                verbose=False)
    return p
Пример #9
0
def medicine(Data):
    print('======= Creating medicine =======')
    
    try:
        #Filter medicine 
        medicine = Data[(Data.Group == 'me')|(Data.Group == 'ma')]
        
        #Setting data with missing times
        medicine.Date = pd.to_datetime(medicine.Date)
        medicine['Date'] = pd.to_datetime(medicine['Date'])
        
        sdate = min(medicine["Date"])   # start date
        edate = max(medicine["Date"])   # end date
        delta = edate - sdate       # as timedelta
        
#        from datetime import date, timedelta    
        
        day = []
        
        for i in range(delta.days + 1):
            d= sdate + timedelta(days=i)
            day.append(d)
            
            DF = pd.DataFrame(day)
            DF.columns = ['Date']
            data_with_missing_times = pd.merge(DF, medicine, on='Date', how='outer')
            medicine = data_with_missing_times
            
            ########HOW TO DEAL WITH MEDICINE NA'S IN PLOTS, NOT TO SHOW THEM#############################################################################################################
            #if (medicine.Name.isnull().sum() > 0):   
            #medicine = medicine[['Date','Name']]
            #medicine = 
            
            medicine = medicine[pd.isna(medicine.Name) == False]
            #Creating and saving Medicine plot
        
        
        if (len(medicine) > 5):        
            #Plot everything but Na's
            
            f_tl1 = (p9.ggplot(data=medicine,
                               mapping=p9.aes(x='Date', y = 'Name'))
        + p9.geom_point(color = 'red', size = 3)
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size= 18),
                   axis_title = p9.element_text(size = 18,face = 'bold'))
        + p9.labs(title = '', x='',y='')
        )
        
            
        f_tl1.save(filename = 'Medicine.jpeg',
                   plot = f_tl1,
                   path = "pdf/iteration/",
                   width = 25, height = 5,
                   dpi = 320)
        
    except:
        print("Medicical graph failed")

    return(print('=================================medicine DONE ============================='))    
Пример #10
0
def plot_significance_vs_ranking(
    summary_df, method_name, x_label, output_figure_filename
):
    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Test statistic": summary_df[
                method_stats_dict[method_name] + " (Real)"
            ].values,
            "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values,
        },
        index=summary_df.index,
    )

    fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank"))
    fig += pn.geom_point()
    fig += pn.geom_point(
        plot_df[plot_df["Percentile rank"] > 0.9],
        pn.aes(x="Test statistic", y="Percentile rank"),
        color="red",
    )
    fig += pn.geom_text(
        pn.aes(
            label=[
                x if plot_df.loc[x, "Percentile rank"] > 0.9 else ""
                for x in plot_df.index
            ]
        ),
        ha="left",
        va="top",
        size=5,
    )
    fig += pn.labs(
        x=x_label,
        y="Percentile of ranking",
        title=f"{method_name} pathway statistics vs ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    print(fig)

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
Пример #11
0
def duration_TL(Data):
    print('======= Creating duration_TL =======')
    x = Data.Duration[pd.isna(Data.Duration) == True]
    
    if ((len(x)+10)) >= len(Data):
       print("WARNING: All values for Duration are NA's")
    
    else:
        #Filter Symptomes and Correct Durations
        Symptomes = Data[(Data.Group == "sy") & (Data.Duration < 180)]
        
        #Setting data with missing times
        Symptomes['Date'] = pd.to_datetime(Symptomes['Date'])
        
        if len(Symptomes) == 0:
            print('No duration for TL_2')
        else: 
            sdate = min(Symptomes["Date"])   # start date
            edate = max(Symptomes["Date"])   # end date
            delta = edate - sdate       # as timedelta
#            from datetime import timedelta
            day = []
            for i in range(delta.days + 1):
                d= sdate + timedelta(days=i)
                day.append(d)
                
            DF = pd.DataFrame(day)
            DF.columns = ['Date']
            data_with_missing_times = pd.merge(DF, Symptomes, on='Date', how='outer')
            data_with_missing_times.Date = pd.to_datetime(data_with_missing_times.Date)
            if delta.days > 1825:
                datebreaks = '18 months'
            else:
                if delta.days > 1095:
                    datebreaks = '12 months'                
                else:
                    datebreaks = '6 months'

                
            plot = (p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='Date', 
                                                                           y='Duration'))
            + p9.geom_smooth(color = 'red', size = 5, method="loess", se=False)
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=33), 
                       axis_title = p9.element_text(size = 33,face = 'bold'))
            + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
            + p9.labs(x='',y=''))    

            if (len(data_with_missing_times) > 0):

                plot.save(filename = 'TL_2.jpeg',
                         plot = plot,
                         path = "pdf/iteration/",
                         width = 25, height = 5,
                         dpi = 320)
                

            else: 
                print('Plot not created; no data found.')
        return(print('=================================duration_TL DONE ============================='))
Пример #12
0
def frequency_TL(Data):
    print('======= Creating frequency_TL =======')
    #Filtering
    Data['date_4'] = Data['date'].dt.date
    tl4 = Data.groupby("date_4", sort = False, as_index = False).count()
    tl4 = tl4.iloc[:, 0:2]
    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})    
    
    sdate = min(tl4["date_4"])  # start date
    edate = max(tl4["date_4"])   # end date
    delta = edate - sdate       # as timedelta
    
#    tl4 = Data.groupby("Date", sort = False, as_index = False).count()
#    tl4 = tl4.iloc[:, 0:2]
#    tl4 = tl4.rename(columns = {"Unnamed: 0": "n"})
#    tl4['Date'] = pd.to_datetime(tl4['Date'])
    
#    #Setting data with missing times
#    sdate = min(tl4["Date"])  # start date
#    edate = max(tl4["Date"])   # end date
#    delta = edate - sdate       # as timedelta
    
    from datetime import timedelta    
    day = []
    for i in range(delta.days + 1):
        d= sdate + timedelta(days=i)
        day.append(d)
        
    DF = pd.DataFrame(day)
    DF.columns = ['date_4']
    data_with_missing_times = pd.merge(DF, tl4, on='date_4', how='outer')
    if delta.days > 1825:
                datebreaks = '18 months'
    else:
        if delta.days > 1095:
            datebreaks = '12 months'                
        else:
            datebreaks = '6 months'
    #Creating and saving TL_4
    
    plot =(p9.ggplot(data=data_with_missing_times,
                     mapping=p9.aes(x='date_4',y='n'))
        + p9.geom_col(fill = 'red')
        + p9.theme_classic()
        + p9.theme(axis_text = p9.element_text(size=40),
                   axis_title = p9.element_text(size = 40,face = 'bold'))
        + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
        + p9.labs(x='',y='')
        )
        
    if (len(data_with_missing_times) > 0):
        plot.save(filename = 'TL_4.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    return(print('=================================frequency_TL DONE ============================='))
Пример #13
0
def make_single_bar_chart_multi_year(survey_data, column, facet, proportionally=False):
    """Make a barchart showing the number of respondents responding to a single column.
        Bars are colored by which year of the survey they correspond to. If facet
        is not empty, the resulting plot will be faceted into subplots by the variables
        given. 

    Args:
        survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey   
        column (str): Column to plot responses to
        facet (list,optional): List of columns use for grouping
        proportionally (bool, optiona ): Defaults to False. If True,
            the bars heights are determined proportionally to the 
            total number of responses in that facet. 

    Returns:
        (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file

    """
    cols = [column, facet]
    show_legend = False
    topic_data = survey_data[cols + ["year"]]

    topic_data_long = make_long(topic_data, facet, multi_year=True)

    if proportionally:
        proportions = (
            topic_data_long[topic_data_long.rating == 1].groupby(facet + ["year"]).sum()
            / topic_data_long.groupby(facet + ["year"]).sum()
        ).reset_index()
    else:
        proportions = (
            topic_data_long[topic_data_long.rating == 1]
            .groupby(facet + ["year"])
            .count()
            .reset_index()
        )

    x = topic_data_long.columns.tolist()
    x.remove("level_1")

    ## Uncomment to return dataframe instead of plot
    # return proportions

    return (
        p9.ggplot(proportions, p9.aes(x=facet, fill="year", y="level_1"))
        + p9.geom_bar(show_legend=show_legend, stat="identity")
        + p9.theme(
            axis_text_x=p9.element_text(angle=45, ha="right"),
            strip_text_y=p9.element_text(angle=0, ha="left"),
        )
        + p9.scale_x_discrete(
            limits=topic_data_long[facet].unique().tolist(),
            labels=[
                x.replace("_", " ") for x in topic_data_long[facet].unique().tolist()
            ],
        )
    )
Пример #14
0
def plot_violin_plots(
    par_id: str,
    dims: List[str],
    draws: Dict,
    log_scale_variables: List[str],
    units: Dict[str, str],
    confidence_intervals,
    measurements,
):
    """Plot and save violin plots of parsed distributions.

    :param par_id: Name of the parameter plotted
    :param dims: Dimensions of the parameter
    :param draws: pd.Dataframe of parameter distribution
    indexed by dimensions and contains the population samples
    :param log_scale_variables: Parameters that are log-distributed
    :param units: Dictionary of units for each parameter
    """
    par_units = units[par_id]
    x = fill = dims[0] if len(dims) <= 1 else "experiments"
    plot = (p9.ggplot(data=draws) + p9.geom_violin(
        p9.aes(y=f"{par_id}", x=x, fill=fill),
        position="identity",
        color="None",
        size=0.5,
        alpha=0.7,
        weight=0.7,
        linetype="None",
    ) + p9.labels.ylab(f"{par_id} {par_units}"))
    if par_id in confidence_intervals.keys():
        plot += p9.geoms.geom_errorbar(
            p9.aes(x=x, ymin="lower_ci", ymax="upper_ci"),
            data=confidence_intervals[par_id],
            width=0.1,
        )
    if par_id in measurements.keys():
        if len(measurements[par_id]) > 0:
            plot += p9.geoms.geom_point(
                p9.aes(y="measurement", x=x),
                data=measurements[par_id],
            )
    if len(dims) == 1:
        plot += p9.themes.theme(axis_text_x=p9.element_text(angle=70), )
    if len(dims) > 1:
        plot += p9.facet_wrap(f"~{dims[1]}") + p9.themes.theme(
            panel_spacing_y=0.05,
            panel_spacing_x=0.35,
            axis_title=p9.element_text(size=10),
            axis_text=p9.element_text(size=11),
            axis_text_y=p9.element_text(size=8, angle=45),
            axis_title_x=p9.element_blank(),
            axis_text_x=p9.element_blank(),
        )
    if par_id in log_scale_variables:
        plot += p9.scale_y_log10()

    return plot
Пример #15
0
def plot_market_wide_sector_performance(all_stocks_cip: pd.DataFrame):
    """
    Display specified dates for average sector performance. Each company is assumed to have at zero
    at the start of the observation period. A plot as base64 data is returned.
    """
    n_stocks = len(all_stocks_cip)
    # merge in sector information for each company
    code_and_sector = stocks_by_sector()
    n_unique_sectors = len(code_and_sector["sector_name"].unique())
    print("Found {} unique sectors".format(n_unique_sectors))

    #print(df)
    #print(code_and_sector)
    df = all_stocks_cip.merge(code_and_sector, left_index=True, right_on="asx_code")
    print(
        "Found {} stocks, {} sectors and merged total: {}".format(
            n_stocks, len(code_and_sector), len(df)
        )
    )
    # compute average change in percent of each unique sector over each day and sum over the dates
    cumulative_pct_change = df.expanding(axis="columns").sum()
    # merge date-wise into df
    for date in cumulative_pct_change.columns:
        df[date] = cumulative_pct_change[date]
    # df.to_csv('/tmp/crap.csv')
    grouped_df = df.groupby("sector_name").mean()
    # grouped_df.to_csv('/tmp/crap.csv')

    # ready the dataframe for plotting
    grouped_df = pd.melt(
        grouped_df,
        ignore_index=False,
        var_name="date",
        value_name="cumulative_change_percent",
    )
    grouped_df["sector"] = grouped_df.index
    grouped_df["date"] = pd.to_datetime(grouped_df["date"])
    n_col = 3
    plot = (
        p9.ggplot(
            grouped_df, p9.aes("date", "cumulative_change_percent", color="sector")
        )
        + p9.geom_line(size=1.0)
        + p9.facet_wrap(
            "~sector", nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales="free_y"
        )
        + p9.xlab("")
        + p9.ylab("Average sector change (%)")
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=6),
            axis_text_y=p9.element_text(size=6),
            figure_size=(12, 6),
            panel_spacing=0.3,
            legend_position="none",
        )
    )
    return plot_as_inline_html_data(plot)
Пример #16
0
def intensity_TL(Data):
    print('======= Creating intensity_TL =======')    
    x = Data.Intensity[pd.isna(Data.Intensity) == True]
    if (len(x) == len(Data)):
       print("WARNING: All values for Intensity are NA's")

    else:
        #Filter Symptomes
        Symptomes = Data[(Data.Group == "sy")]
        tl3 = Symptomes.groupby("Date", as_index =False, sort = False)['Intensity'].agg({'Intensity': 'mean'})
        #tl3['Day'] = range(1,(len(tl3)+1))
        #tl3 = tl3.rename(columns = {'Intensity': "Intensity_mean"})
        tl3['Date'] = pd.to_datetime(tl3['Date'])
        #Setting data with missing times
        sdate = min(tl3["Date"])   # start date
        edate = max(tl3["Date"])   # end date
        delta = edate - sdate       # as timedelta
        
#        from datetime import timedelta
        day = []
        for i in range(delta.days + 1):
            d= sdate + timedelta(days=i)
            day.append(d)
            
        DF = pd.DataFrame(day)
        DF.columns = ['Date']
        data_with_missing_times = pd.merge(DF, tl3, on='Date', how='outer')
        if delta.days > 1825:
                datebreaks = '18 months'
        else:
            if delta.days > 1095:
                datebreaks = '12 months'
            else:
                datebreaks = '6 months'
        
        plot =(p9.ggplot(data=data_with_missing_times,
                         mapping=p9.aes(x='Date',y='Intensity'))
            + p9.geom_point(color = 'red', size = 5)
            + p9.theme_classic()
            + p9.theme(axis_text = p9.element_text(size=40),
                       axis_title = p9.element_text(size = 40,face = 'bold'))
            + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks)
            + p9.labs(x='',y='')
            )    
    
    #Creating and saving TL_3
    if (len(data_with_missing_times) > 5):
        #TL3 = TL_3(data_with_missing_times)
        
        plot.save(filename = 'TL_3.jpeg',
                 plot = plot,
                 path = "pdf/iteration/",
                 width = 25, height = 5,
                 dpi = 320)
    else: 
        print('Plot not created; no data found.')
    return(print('=================================intensity_TL DONE ============================='))
Пример #17
0
def plot_paired_ranking(
    method1_summary_df,
    method2_summary_df,
    method1_name,
    method2_name,
    output_figure_filename,
):
    # Join dataframes to make sure the rows are aligned
    merged_summary_df = method1_summary_df.merge(
        method2_summary_df,
        left_index=True,
        right_index=True,
        suffixes=[f"_{method1_name}", f"_{method2_name}"],
    )

    # Format input dataframe
    plot_df = pd.DataFrame(
        data={
            "Method1 ranking": merged_summary_df[
                f"Percentile (simulated)_{method1_name}"
            ].values,
            "Method2 ranking": merged_summary_df[
                f"Percentile (simulated)_{method2_name}"
            ].values,
        },
        index=merged_summary_df.index,
    )
    fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking"))
    fig += pn.geom_point()
    fig += pn.labs(
        x=f"{method1_name} pathway ranking",
        y=f"{method2_name} pathway ranking",
        title=f"{method1_name} vs {method2_name} pathway ranking",
    )
    fig += pn.theme_bw()
    fig += pn.theme(
        legend_title_align="center",
        plot_background=pn.element_rect(fill="white"),
        legend_key=pn.element_rect(fill="white", colour="white"),
        legend_title=pn.element_text(family="sans-serif", size=15),
        legend_text=pn.element_text(family="sans-serif", size=12),
        plot_title=pn.element_text(family="sans-serif", size=15),
        axis_text=pn.element_text(family="sans-serif", size=12),
        axis_title=pn.element_text(family="sans-serif", size=15),
    )

    # Save figure
    fig.save(
        output_figure_filename,
        format="svg",
        bbox_inches="tight",
        transparent=True,
        pad_inches=0,
        dpi=300,
    )
    print(fig)
def plot_metrics_comparison_lineplot_grid(dataframe,
                                          models_labels,
                                          metrics_labels,
                                          figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(
            dataframe,
            p9.aes(x='threshold',
                   y='value',
                   group='variable',
                   color='variable',
                   shape='variable'))
        # Add the points and lines.
        + p9.geom_point() + p9.geom_line()
        # Rename the x axis and give some space to left and right.
        + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2))
        # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits.
        +
        p9.scale_y_continuous(name='Value',
                              expand=(0, 0.05),
                              labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Replace the names in the legend.
        + p9.scale_shape_discrete(
            name='Metric', labels=lambda l: [metrics_labels[x] for x in l])
        # Define the colors for the metrics for color-blind people.
        +
        p9.scale_color_brewer(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l],
                              type='qual',
                              palette='Set2')
        # Place the plots in a grid, renaming the labels for rows and columns.
        + p9.facet_grid('iterations ~ model',
                        labeller=p9.labeller(
                            rows=lambda x: f'iters = {x}',
                            cols=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the y axis name.
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Пример #19
0
 def __init__(self, base_size=11, base_family='DejaVu Sans'):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(theme(
         axis_ticks=element_line(color='#DDDDDD', size=0.5),
         panel_border=element_rect(fill='None', color='#838383',
                                   size=1),
         strip_background=element_rect(
             fill='#DDDDDD', color='#838383', size=1),
         strip_text_x=element_text(color='black'),
         strip_text_y=element_text(color='black', angle=-90)
     ), inplace=True)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24, format_string='{:.1%}') +
      scale_x_discrete(limits=self._data["pattern"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Design Pattern Counts") + xlab("Design Pattern") +
      ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=24, height=8)
Пример #21
0
 def __init__(self, base_size=11, base_family='DejaVu Sans'):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(theme(
         axis_ticks=element_line(color='#DDDDDD', size=0.5),
         panel_border=element_rect(fill='None', color='#838383',
                                   size=1),
         strip_background=element_rect(
             fill='#DDDDDD', color='#838383', size=1),
         strip_text_x=element_text(color='black'),
         strip_text_y=element_text(color='black', angle=-90),
         legend_key=element_blank()
     ), inplace=True)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes(x="category", y="count", label="percent")) +
      geom_bar(stat="identity", fill="#1e4f79") +
      geom_text(va='bottom', size=24) +
      scale_x_discrete(limits=self._data["category"]) +
      scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) +
      ggtitle("Classes per Category") + xlab("Category") +
      ylab("Number of Classes") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_text_x=element_text(rotation=45, ha="right"))).save(
                file_path, width=7, height=7)
Пример #23
0
Файл: plot.py Проект: NPSDC/qb
 def __init__(self, base_size=11, base_family="DejaVu Sans"):
     theme_light.__init__(self, base_size, base_family)
     self.add_theme(
         theme(
             axis_ticks=element_line(color="#DDDDDD", size=0.5),
             panel_border=element_rect(fill="None", color="#838383", size=1),
             strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1),
             strip_text_x=element_text(color="black"),
             strip_text_y=element_text(color="black", angle=-90),
             legend_key=element_blank(),
         ),
         inplace=True,
     )
Пример #24
0
def plot_boxplot_series(df, normalisation_method=None):
    """
    Treating each column as a separate boxplot and each row as an independent observation 
    (ie. different company)
    render a series of box plots to identify a shift in performance from the observations.
    normalisation_method should be one of the values present in 
    SectorSentimentSearchForm.normalisation_choices
    """
    # compute star performers: those who are above the mean on a given day counted over all days
    count = defaultdict(int)
    for col in df.columns:
        avg = df.mean(axis=0)
        winners = df[df[col] > avg[col]][col]
        for winner in winners.index:
            count[winner] += 1
    winner_results = []
    for asx_code, n_wins in count.items():
        x = df.loc[asx_code].sum()
        # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms
        if x > 0.0:  
            winner_results.append((asx_code, n_wins, x))

    # and plot the normalised data
    if normalisation_method is None or normalisation_method == "1":
        normalized_df = df
        y_label = "Percentage change"
    elif normalisation_method == "2":
        normalized_df = (df - df.min()) / (df.max() - df.min())
        y_label = "Percentage change (min/max. scaled)"
    else:
        normalized_df = df / df.max(axis=0)  # div by max if all else fails...
        y_label = "Percentage change (normalised by dividing by max)"

    n_inches = len(df.columns) / 5
    melted = normalized_df.melt(ignore_index=False).dropna()
    plot = (
        p9.ggplot(melted, p9.aes(x="fetch_date", y="value"))
        + p9.geom_boxplot(outlier_colour="blue")
        + p9.theme(
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            figure_size=(12, n_inches),
        )
        + p9.labs(x="Date (YYYY-MM-DD)", y=y_label)
        + p9.coord_flip()
    )
    return (
        plot_as_inline_html_data(plot),
        list(reversed(sorted(winner_results, key=lambda t: t[2]))),
    )
Пример #25
0
def plot_market_wide_sector_performance(all_dates,
                                        field_name='change_in_percent'):
    """
    Display specified dates for average sector performance. Each company is assumed to have at zero
    at the start of the observation period. A plot as base64 data is returned.
    """
    df = company_prices(None, all_dates=all_dates,
                        fields='change_in_percent')  # None == all stocks
    n_stocks = len(df)
    # merge in sector information for each company
    code_and_sector = stocks_by_sector()
    n_unique_sectors = len(code_and_sector['sector_name'].unique())
    print("Found {} unique sectors".format(n_unique_sectors))

    #print(code_and_sector)
    df = df.merge(code_and_sector, left_on='asx_code', right_on='asx_code')
    print("Found {} stocks, {} sectors and merged total: {}".format(
        n_stocks, len(code_and_sector), len(df)))
    # compute average change in percent of each unique sector over each day and sum over the dates
    cumulative_pct_change = df.expanding(axis='columns').sum()
    # merge date-wise into df
    for date in cumulative_pct_change.columns:
        df[date] = cumulative_pct_change[date]
    #df.to_csv('/tmp/crap.csv')
    grouped_df = df.groupby('sector_name').mean()
    #grouped_df.to_csv('/tmp/crap.csv')

    # ready the dataframe for plotting
    grouped_df = pd.melt(grouped_df,
                         ignore_index=False,
                         var_name='date',
                         value_name='cumulative_change_percent')
    grouped_df['sector'] = grouped_df.index
    grouped_df['date'] = pd.to_datetime(grouped_df['date'])
    n_col = 3
    plot = (p9.ggplot(
        grouped_df, p9.aes('date', 'cumulative_change_percent',
                           color='sector')) + p9.geom_line(size=1.0) +
            p9.facet_wrap('~sector',
                          nrow=n_unique_sectors // n_col + 1,
                          ncol=n_col,
                          scales='free_y') + p9.xlab('') +
            p9.ylab('Average sector change (%)') +
            p9.theme(axis_text_x=p9.element_text(angle=30, size=6),
                     axis_text_y=p9.element_text(size=6),
                     figure_size=(12, 6),
                     panel_spacing=0.3,
                     legend_position='none'))
    return plot_as_inline_html_data(plot)
 def create(self, file_path: str) -> None:
     (ggplot(self._data, aes("value")) +
      geom_histogram(bins=100, fill="#1e4f79") +
      facet_wrap(facets="variable", scales="free", ncol=3) + xlim(0, 1) +
      scale_y_continuous(labels=comma_format()) +
      ggtitle("Intensity of Design Pattern Use") +
      xlab("Percentage of Classes Participating in Design Pattern") +
      ylab("Number of Projects") +
      theme_classic(base_size=32, base_family="Helvetica") +
      theme(text=element_text(size=32),
            axis_title_y=element_text(margin={"r": 40}),
            subplots_adjust={
                "wspace": 0.3,
                "hspace": 0.5
            })).save(file_path, width=24, height=24)
def plot_preprocessing_boxplot_bymodel(dataframe,
                                       models_labels,
                                       metrics_labels,
                                       groups_labels,
                                       figure_size=(14, 4)):
    """
    We define a function to plot the grid.
    """

    return (
        # Define the plot.
        p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group'))
        # Add the boxplots.
        + p9.geom_boxplot(position='dodge')
        # Rename the x axis.
        + p9.scale_x_discrete(name='Metric',
                              labels=lambda l: [metrics_labels[x] for x in l])
        # Rename the y axis.
        + p9.scale_y_continuous(
            name='Value',
            expand=(0, 0.05),
            # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1],
            labels=lambda l: ['{:.2f}'.format(x) for x in l])
        # Define the colors for the metrics for color-blind people.
        + p9.scale_fill_brewer(name='Group',
                               labels=lambda l: [groups_labels[x] for x in l],
                               type='qual',
                               palette='Set2')
        # Place the plots in a grid, renaming the labels.
        + p9.facet_grid(
            'model ~ .',
            scales='free_y',
            labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}'))
        # Define the theme for the plot.
        + p9.theme(
            # Remove the x and y axis names.
            axis_title_x=p9.element_blank(),
            axis_title_y=p9.element_blank(),
            # Set the size of x and y tick labels font.
            axis_text_x=p9.element_text(size=7),
            axis_text_y=p9.element_text(size=7),
            # Place the legend on top, without title, and reduce the margin.
            legend_title=p9.element_blank(),
            legend_position='top',
            legend_box_margin=2,
            # Set the size for the figure.
            figure_size=figure_size,
        ))
Пример #28
0
def bsuite_bar_plot(df_in: pd.DataFrame,
                    sweep_vars: Sequence[str] = None) -> gg.ggplot:
  """Output bar plot of bsuite data."""
  df = _clean_bar_plot_data(df_in, sweep_vars)

  p = (gg.ggplot(df)
       + gg.aes(x='env', y='score', colour='type', fill='type')
       + gg.geom_bar(position='dodge', stat='identity')
       + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5)
       + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS)
       + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS)
       + gg.xlab('experiment')
       + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1))
      )
  if not all(df.finished):  # add a layer of alpha for unfinished jobs
    p += gg.aes(alpha='finished')
    p += gg.scale_alpha_discrete(range=[0.3, 1.0])

  # Compute the necessary size of the plot
  if sweep_vars:
    p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1)
    n_hypers = df[sweep_vars].drop_duplicates().shape[0]
  else:
    n_hypers = 1
  return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
Пример #29
0
def theme_cognoma(fontsize_mult=1):
    return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme(
        line=gg.element_line(color="#4d4d4d"),
        rect=gg.element_rect(fill="white", color=None),
        text=gg.element_text(color="black"),
        axis_ticks=gg.element_line(color="#4d4d4d"),
        legend_key=gg.element_rect(color=None),
        panel_border=gg.element_rect(color="#4d4d4d"),
        panel_grid=gg.element_line(color="#b3b3b3"),
        panel_grid_major_x=gg.element_blank(),
        panel_grid_minor=gg.element_blank(),
        strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"),
        axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"),
        axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"),
        axis_title_y=gg.element_text(size=13 * fontsize_mult,
                                     color="#4d4d4d")))
Пример #30
0
    def plot_zmw_stats(self, **kwargs):
        """Plot of ZMW stats for all runs.

        Note
        ----
        Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`.

        Parameters
        ----------
        ``**kwargs`` : dict
            Keyword arguments passed to :meth:`Summaries.zmw_stats`.

        Returns
        -------
        plotnine.ggplot.ggplot
            Stacked bar graph of ZMW stats for each run.

        """
        df = self.zmw_stats(**kwargs)

        p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) +
             p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) +
             p9.theme(axis_text_x=p9.element_text(angle=90,
                                                  vjust=1,
                                                  hjust=0.5),
                      figure_size=(0.4 * len(df['name'].unique()), 2.5)
                      ) +
             p9.ylab('number of ZMWs') +
             p9.xlab('')
             )

        if len(df['status'].unique()) < len(CBPALETTE):
            p = p + p9.scale_fill_manual(CBPALETTE[1:])

        return p
Пример #31
0
def plot_categ_spatial(mod,
                       adata,
                       sample_col,
                       color,
                       n_columns=2,
                       figure_size=(24, 5.7),
                       point_size=0.8,
                       text_size=9):

    for_plot = adata.obs[["imagecol", "imagerow", sample_col]]
    for_plot["color"] = color

    # fix types
    for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True)
    # for_plot['color'] = pd.to_numeric(for_plot['color'])
    for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False)
    for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"])
    for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"])

    ax = (
        plotnine.ggplot(
            for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color"))
        + plotnine.geom_point(size=point_size)  # + plotnine.scale_color_cmap()
        + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme(
            panel_background=plotnine.element_rect(
                fill="black", colour="black", size=0, linetype="solid"),
            panel_grid_major=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            panel_grid_minor=plotnine.element_line(
                size=0, linetype="solid", colour="black"),
            strip_text=plotnine.element_text(size=text_size),
        ) + plotnine.facet_wrap("~sample", ncol=n_columns) +
        plotnine.theme(figure_size=figure_size))

    return ax
Пример #32
0
def theme_cognoma(fontsize_mult=1):   
    import plotnine as gg
    
    return (gg.theme_bw(base_size = 14 * fontsize_mult) +
        gg.theme(
          line = gg.element_line(color = "#4d4d4d"), 
          rect = gg.element_rect(fill = "white", color = None), 
          text = gg.element_text(color = "black"), 
          axis_ticks = gg.element_line(color = "#4d4d4d"),
          legend_key = gg.element_rect(color = None), 
          panel_border = gg.element_rect(color = "#4d4d4d"),  
          panel_grid = gg.element_line(color = "#b3b3b3"), 
          panel_grid_major_x = gg.element_blank(),
          panel_grid_minor = gg.element_blank(),
          strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"),
          axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"),
          axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"),
          axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d")
    ))
Пример #33
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )