def plot_pointplot(plot_df, y_axis_label="", use_log10=False, limits=[0, 3.2]): """ Plots the pointplot Arguments: plot_df - the dataframe that contains the odds ratio and lemmas y_axis_label - the label for the y axis use_log10 - use log10 for the y axis? """ graph = ( p9.ggplot(plot_df, p9.aes(x="lemma", y="odds_ratio")) + p9.geom_pointrange(p9.aes(ymin="lower_odds", ymax="upper_odds"), position=p9.position_dodge(width=1), size=0.3, color="#253494") + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + (p9.scale_y_log10() if use_log10 else p9.scale_y_continuous( limits=limits)) + p9.geom_hline(p9.aes(yintercept=1), linetype='--', color='grey') + p9.coord_flip() + p9.theme_seaborn( context='paper', style="ticks", font_scale=1, font='Arial') + p9.theme( # 640 x 480 figure_size=(6.66, 5), panel_grid_minor=p9.element_blank(), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10)) + p9.labs(x=None, y=y_axis_label)) return graph
def __init__(self): pn.theme_minimal.__init__(self, base_family='Open Sans') self.add_theme(pn.theme( axis_title=pn.element_text(size=10), axis_title_y=pn.element_text(margin={'r': 12}), panel_border=pn.element_rect(color='gainsboro', size=1, fill=None) ), inplace=True)
def plot_fundamentals(df, stock) -> str: assert isinstance(df, pd.DataFrame) columns_to_report = ["pe", "eps", "annual_dividend_yield", "volume", \ "last_price", "change_in_percent_cumulative", \ "change_price", "market_cap", "number_of_shares"] colnames = df.columns for column in columns_to_report: assert column in colnames df["volume"] = df["last_price"] * df["volume"] / 1000000 # again, express as $(M) df["market_cap"] /= 1000 * 1000 df["number_of_shares"] /= 1000 * 1000 df["fetch_date"] = df.index plot_df = pd.melt( df, id_vars="fetch_date", value_vars=columns_to_report, var_name="indicator", value_name="value", ) plot_df["value"] = pd.to_numeric(plot_df["value"]) plot_df["fetch_date"] = pd.to_datetime(plot_df["fetch_date"]) plot = ( p9.ggplot(plot_df, p9.aes("fetch_date", "value", color="indicator")) + p9.geom_line(size=1.5, show_legend=False) + p9.facet_wrap("~ indicator", nrow=len(columns_to_report), ncol=1, scales="free_y") + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), axis_text_y=p9.element_text(size=7), figure_size=(8, len(columns_to_report))) # + p9.aes(ymin=0) + p9.xlab("") + p9.ylab("") ) return plot_as_inline_html_data(plot)
def plot_series( df, x=None, y=None, tick_text_size=6, line_size=1.5, y_axis_label="Point score", x_axis_label="", color="stock", use_smooth_line=False ): assert len(df) > 0 assert len(x) > 0 and len(y) > 0 assert line_size > 0.0 assert isinstance(tick_text_size, int) and tick_text_size > 0 assert y_axis_label is not None assert x_axis_label is not None args = {'x': x, 'y': y} if color: args['color'] = color plot = p9.ggplot(df, p9.aes(**args)) \ + p9.labs(x=x_axis_label, y=y_axis_label) \ + p9.theme( axis_text_x=p9.element_text(angle=30, size=tick_text_size), axis_text_y=p9.element_text(size=tick_text_size), legend_position="none", ) if use_smooth_line: plot += p9.geom_smooth(size=line_size) else: plot += p9.geom_line(size=line_size) return plot_as_inline_html_data(plot)
def plot_bargraph(count_plot_df, plot_df): """ Plots the bargraph Arguments: count_plot_df - The dataframe that contains lemma counts plot_df - the dataframe that contains the odds ratio and lemmas """ graph = ( p9.ggplot(count_plot_df.astype({"count": int}), p9.aes(x="lemma", y="count")) + p9.geom_col(position=p9.position_dodge(width=0.5), fill="#253494") + p9.coord_flip() + p9.facet_wrap("repository", scales='free_x') + p9.scale_x_discrete(limits=(plot_df.sort_values( "odds_ratio", ascending=True).lemma.tolist())) + p9.scale_y_continuous(labels=custom_format('{:,.0g}')) + p9.labs(x=None) + p9.theme_seaborn( context='paper', style="ticks", font="Arial", font_scale=0.95) + p9.theme( # 640 x 480 figure_size=(6.66, 5), strip_background=p9.element_rect(fill="white"), strip_text=p9.element_text(size=12), axis_title=p9.element_text(size=12), axis_text_x=p9.element_text(size=10), )) return graph
def plot_overlap_duration(self, data, options): matches = data["matches"] matches = matches.loc[matches.tag_overlap > 0] # matches.loc[:, "log_dur"] = log() plt = ggplot(data=matches, mapping=aes(x="tag_duration", y="tag_overlap",),) plt = ( plt + geom_point() + xlab("Tag duration") + ylab("Proportion tag overlapping with matching event") + theme_classic() + theme( axis_text_x=element_text(angle=90, vjust=1, hjust=1, margin={"r": -30}), plot_title=element_text( weight="bold", size=14, margin={"t": 10, "b": 10} ), figure_size=(10, 10), text=element_text(size=12, weight="bold"), ) + ggtitle( ( "Proportion of tag overlapping with matching event depending on duration " + "size for model {}, database {}, class {}\n" + "with detector options {}" ).format( options["scenario_info"]["model"], options["scenario_info"]["database"], options["scenario_info"]["class"], options, ) ) ) return plt
def plot_downstream(clwe, table, output, ylim): df = pd.read_csv(data_file(table)) df = df[df.clwe == clwe] df = df.assign( refine=pd.Categorical(df['refine'], ['Original', '+retrofit', '+synthetic']), language=pd.Categorical(df['language'], ['DE', 'ES', 'FR', 'IT', 'JA', 'RU', 'ZH', 'AVG']) ) g = p9.ggplot(df, p9.aes(x='language', y='accuracy', fill='refine')) g += p9.geom_bar(position='dodge', stat='identity', width=.8) g += p9.coord_cartesian(ylim=ylim) g += p9.scale_fill_manual(['#999999', '#EA5F94', '#FFB14E']) g += p9.theme_void(base_size=FONT_SIZE, base_family='Arial') g += p9.theme( plot_background=p9.element_rect(fill='white'), panel_grid_major_y=p9.element_line(), axis_text_x=p9.element_text(margin={'t': 10}), axis_text_y=p9.element_text(margin={'r': 8}), legend_position=(.7, .9), legend_direction='horizontal', legend_title=p9.element_blank(), legend_text=p9.element_text(size=FONT_SIZE), legend_box_margin=0, figure_size=(12, 3) ) g.save(filename=output_file(output))
def plot_predict(forecast): p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) + geom_point(colour='blue', alpha=0.3, na_rm=True) + geom_line(colour='blue', na_rm=True) + geom_line( data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') + geom_ribbon(data=forecast, mapping=aes(ymin='yhat_lower', ymax='yhat_upper'), fill='blue', alpha=0.1) + scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') + xlab('Time') + ylab('Pressure') + theme_bw() + theme(axis_text_x=element_text( angle=45, hjust=1, face='bold', color='black'), axis_text_y=element_text(face='bold', colour='black'))) ggplot.save(p, filename='predict_pressure_chart.png', path=os.path.join(os.path.abspath(os.path.dirname(__file__)), 'png'), width=8, height=6, units='in', dpi=326, verbose=False) return p
def medicine(Data): print('======= Creating medicine =======') try: #Filter medicine medicine = Data[(Data.Group == 'me')|(Data.Group == 'ma')] #Setting data with missing times medicine.Date = pd.to_datetime(medicine.Date) medicine['Date'] = pd.to_datetime(medicine['Date']) sdate = min(medicine["Date"]) # start date edate = max(medicine["Date"]) # end date delta = edate - sdate # as timedelta # from datetime import date, timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['Date'] data_with_missing_times = pd.merge(DF, medicine, on='Date', how='outer') medicine = data_with_missing_times ########HOW TO DEAL WITH MEDICINE NA'S IN PLOTS, NOT TO SHOW THEM############################################################################################################# #if (medicine.Name.isnull().sum() > 0): #medicine = medicine[['Date','Name']] #medicine = medicine = medicine[pd.isna(medicine.Name) == False] #Creating and saving Medicine plot if (len(medicine) > 5): #Plot everything but Na's f_tl1 = (p9.ggplot(data=medicine, mapping=p9.aes(x='Date', y = 'Name')) + p9.geom_point(color = 'red', size = 3) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size= 18), axis_title = p9.element_text(size = 18,face = 'bold')) + p9.labs(title = '', x='',y='') ) f_tl1.save(filename = 'Medicine.jpeg', plot = f_tl1, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) except: print("Medicical graph failed") return(print('=================================medicine DONE ============================='))
def plot_significance_vs_ranking( summary_df, method_name, x_label, output_figure_filename ): # Format input dataframe plot_df = pd.DataFrame( data={ "Test statistic": summary_df[ method_stats_dict[method_name] + " (Real)" ].values, "Percentile rank": summary_df["Rank (simulated)"].rank(pct=True).values, }, index=summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Test statistic", y="Percentile rank")) fig += pn.geom_point() fig += pn.geom_point( plot_df[plot_df["Percentile rank"] > 0.9], pn.aes(x="Test statistic", y="Percentile rank"), color="red", ) fig += pn.geom_text( pn.aes( label=[ x if plot_df.loc[x, "Percentile rank"] > 0.9 else "" for x in plot_df.index ] ), ha="left", va="top", size=5, ) fig += pn.labs( x=x_label, y="Percentile of ranking", title=f"{method_name} pathway statistics vs ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) print(fig) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, )
def duration_TL(Data): print('======= Creating duration_TL =======') x = Data.Duration[pd.isna(Data.Duration) == True] if ((len(x)+10)) >= len(Data): print("WARNING: All values for Duration are NA's") else: #Filter Symptomes and Correct Durations Symptomes = Data[(Data.Group == "sy") & (Data.Duration < 180)] #Setting data with missing times Symptomes['Date'] = pd.to_datetime(Symptomes['Date']) if len(Symptomes) == 0: print('No duration for TL_2') else: sdate = min(Symptomes["Date"]) # start date edate = max(Symptomes["Date"]) # end date delta = edate - sdate # as timedelta # from datetime import timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['Date'] data_with_missing_times = pd.merge(DF, Symptomes, on='Date', how='outer') data_with_missing_times.Date = pd.to_datetime(data_with_missing_times.Date) if delta.days > 1825: datebreaks = '18 months' else: if delta.days > 1095: datebreaks = '12 months' else: datebreaks = '6 months' plot = (p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='Date', y='Duration')) + p9.geom_smooth(color = 'red', size = 5, method="loess", se=False) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=33), axis_title = p9.element_text(size = 33,face = 'bold')) + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks) + p9.labs(x='',y='')) if (len(data_with_missing_times) > 0): plot.save(filename = 'TL_2.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================duration_TL DONE ============================='))
def frequency_TL(Data): print('======= Creating frequency_TL =======') #Filtering Data['date_4'] = Data['date'].dt.date tl4 = Data.groupby("date_4", sort = False, as_index = False).count() tl4 = tl4.iloc[:, 0:2] tl4 = tl4.rename(columns = {"Unnamed: 0": "n"}) sdate = min(tl4["date_4"]) # start date edate = max(tl4["date_4"]) # end date delta = edate - sdate # as timedelta # tl4 = Data.groupby("Date", sort = False, as_index = False).count() # tl4 = tl4.iloc[:, 0:2] # tl4 = tl4.rename(columns = {"Unnamed: 0": "n"}) # tl4['Date'] = pd.to_datetime(tl4['Date']) # #Setting data with missing times # sdate = min(tl4["Date"]) # start date # edate = max(tl4["Date"]) # end date # delta = edate - sdate # as timedelta from datetime import timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['date_4'] data_with_missing_times = pd.merge(DF, tl4, on='date_4', how='outer') if delta.days > 1825: datebreaks = '18 months' else: if delta.days > 1095: datebreaks = '12 months' else: datebreaks = '6 months' #Creating and saving TL_4 plot =(p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='date_4',y='n')) + p9.geom_col(fill = 'red') + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks) + p9.labs(x='',y='') ) if (len(data_with_missing_times) > 0): plot.save(filename = 'TL_4.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================frequency_TL DONE ============================='))
def make_single_bar_chart_multi_year(survey_data, column, facet, proportionally=False): """Make a barchart showing the number of respondents responding to a single column. Bars are colored by which year of the survey they correspond to. If facet is not empty, the resulting plot will be faceted into subplots by the variables given. Args: survey_data (pandas.DataFrame): Raw data read in from Kubernetes Survey column (str): Column to plot responses to facet (list,optional): List of columns use for grouping proportionally (bool, optiona ): Defaults to False. If True, the bars heights are determined proportionally to the total number of responses in that facet. Returns: (plotnine.ggplot): Plot object which can be displayed in a notebook or saved out to a file """ cols = [column, facet] show_legend = False topic_data = survey_data[cols + ["year"]] topic_data_long = make_long(topic_data, facet, multi_year=True) if proportionally: proportions = ( topic_data_long[topic_data_long.rating == 1].groupby(facet + ["year"]).sum() / topic_data_long.groupby(facet + ["year"]).sum() ).reset_index() else: proportions = ( topic_data_long[topic_data_long.rating == 1] .groupby(facet + ["year"]) .count() .reset_index() ) x = topic_data_long.columns.tolist() x.remove("level_1") ## Uncomment to return dataframe instead of plot # return proportions return ( p9.ggplot(proportions, p9.aes(x=facet, fill="year", y="level_1")) + p9.geom_bar(show_legend=show_legend, stat="identity") + p9.theme( axis_text_x=p9.element_text(angle=45, ha="right"), strip_text_y=p9.element_text(angle=0, ha="left"), ) + p9.scale_x_discrete( limits=topic_data_long[facet].unique().tolist(), labels=[ x.replace("_", " ") for x in topic_data_long[facet].unique().tolist() ], ) )
def plot_violin_plots( par_id: str, dims: List[str], draws: Dict, log_scale_variables: List[str], units: Dict[str, str], confidence_intervals, measurements, ): """Plot and save violin plots of parsed distributions. :param par_id: Name of the parameter plotted :param dims: Dimensions of the parameter :param draws: pd.Dataframe of parameter distribution indexed by dimensions and contains the population samples :param log_scale_variables: Parameters that are log-distributed :param units: Dictionary of units for each parameter """ par_units = units[par_id] x = fill = dims[0] if len(dims) <= 1 else "experiments" plot = (p9.ggplot(data=draws) + p9.geom_violin( p9.aes(y=f"{par_id}", x=x, fill=fill), position="identity", color="None", size=0.5, alpha=0.7, weight=0.7, linetype="None", ) + p9.labels.ylab(f"{par_id} {par_units}")) if par_id in confidence_intervals.keys(): plot += p9.geoms.geom_errorbar( p9.aes(x=x, ymin="lower_ci", ymax="upper_ci"), data=confidence_intervals[par_id], width=0.1, ) if par_id in measurements.keys(): if len(measurements[par_id]) > 0: plot += p9.geoms.geom_point( p9.aes(y="measurement", x=x), data=measurements[par_id], ) if len(dims) == 1: plot += p9.themes.theme(axis_text_x=p9.element_text(angle=70), ) if len(dims) > 1: plot += p9.facet_wrap(f"~{dims[1]}") + p9.themes.theme( panel_spacing_y=0.05, panel_spacing_x=0.35, axis_title=p9.element_text(size=10), axis_text=p9.element_text(size=11), axis_text_y=p9.element_text(size=8, angle=45), axis_title_x=p9.element_blank(), axis_text_x=p9.element_blank(), ) if par_id in log_scale_variables: plot += p9.scale_y_log10() return plot
def plot_market_wide_sector_performance(all_stocks_cip: pd.DataFrame): """ Display specified dates for average sector performance. Each company is assumed to have at zero at the start of the observation period. A plot as base64 data is returned. """ n_stocks = len(all_stocks_cip) # merge in sector information for each company code_and_sector = stocks_by_sector() n_unique_sectors = len(code_and_sector["sector_name"].unique()) print("Found {} unique sectors".format(n_unique_sectors)) #print(df) #print(code_and_sector) df = all_stocks_cip.merge(code_and_sector, left_index=True, right_on="asx_code") print( "Found {} stocks, {} sectors and merged total: {}".format( n_stocks, len(code_and_sector), len(df) ) ) # compute average change in percent of each unique sector over each day and sum over the dates cumulative_pct_change = df.expanding(axis="columns").sum() # merge date-wise into df for date in cumulative_pct_change.columns: df[date] = cumulative_pct_change[date] # df.to_csv('/tmp/crap.csv') grouped_df = df.groupby("sector_name").mean() # grouped_df.to_csv('/tmp/crap.csv') # ready the dataframe for plotting grouped_df = pd.melt( grouped_df, ignore_index=False, var_name="date", value_name="cumulative_change_percent", ) grouped_df["sector"] = grouped_df.index grouped_df["date"] = pd.to_datetime(grouped_df["date"]) n_col = 3 plot = ( p9.ggplot( grouped_df, p9.aes("date", "cumulative_change_percent", color="sector") ) + p9.geom_line(size=1.0) + p9.facet_wrap( "~sector", nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales="free_y" ) + p9.xlab("") + p9.ylab("Average sector change (%)") + p9.theme( axis_text_x=p9.element_text(angle=30, size=6), axis_text_y=p9.element_text(size=6), figure_size=(12, 6), panel_spacing=0.3, legend_position="none", ) ) return plot_as_inline_html_data(plot)
def intensity_TL(Data): print('======= Creating intensity_TL =======') x = Data.Intensity[pd.isna(Data.Intensity) == True] if (len(x) == len(Data)): print("WARNING: All values for Intensity are NA's") else: #Filter Symptomes Symptomes = Data[(Data.Group == "sy")] tl3 = Symptomes.groupby("Date", as_index =False, sort = False)['Intensity'].agg({'Intensity': 'mean'}) #tl3['Day'] = range(1,(len(tl3)+1)) #tl3 = tl3.rename(columns = {'Intensity': "Intensity_mean"}) tl3['Date'] = pd.to_datetime(tl3['Date']) #Setting data with missing times sdate = min(tl3["Date"]) # start date edate = max(tl3["Date"]) # end date delta = edate - sdate # as timedelta # from datetime import timedelta day = [] for i in range(delta.days + 1): d= sdate + timedelta(days=i) day.append(d) DF = pd.DataFrame(day) DF.columns = ['Date'] data_with_missing_times = pd.merge(DF, tl3, on='Date', how='outer') if delta.days > 1825: datebreaks = '18 months' else: if delta.days > 1095: datebreaks = '12 months' else: datebreaks = '6 months' plot =(p9.ggplot(data=data_with_missing_times, mapping=p9.aes(x='Date',y='Intensity')) + p9.geom_point(color = 'red', size = 5) + p9.theme_classic() + p9.theme(axis_text = p9.element_text(size=40), axis_title = p9.element_text(size = 40,face = 'bold')) + p9.scale_x_datetime(date_labels = '%Y-%m', date_breaks = datebreaks) + p9.labs(x='',y='') ) #Creating and saving TL_3 if (len(data_with_missing_times) > 5): #TL3 = TL_3(data_with_missing_times) plot.save(filename = 'TL_3.jpeg', plot = plot, path = "pdf/iteration/", width = 25, height = 5, dpi = 320) else: print('Plot not created; no data found.') return(print('=================================intensity_TL DONE ============================='))
def plot_paired_ranking( method1_summary_df, method2_summary_df, method1_name, method2_name, output_figure_filename, ): # Join dataframes to make sure the rows are aligned merged_summary_df = method1_summary_df.merge( method2_summary_df, left_index=True, right_index=True, suffixes=[f"_{method1_name}", f"_{method2_name}"], ) # Format input dataframe plot_df = pd.DataFrame( data={ "Method1 ranking": merged_summary_df[ f"Percentile (simulated)_{method1_name}" ].values, "Method2 ranking": merged_summary_df[ f"Percentile (simulated)_{method2_name}" ].values, }, index=merged_summary_df.index, ) fig = pn.ggplot(plot_df, pn.aes(x="Method1 ranking", y="Method2 ranking")) fig += pn.geom_point() fig += pn.labs( x=f"{method1_name} pathway ranking", y=f"{method2_name} pathway ranking", title=f"{method1_name} vs {method2_name} pathway ranking", ) fig += pn.theme_bw() fig += pn.theme( legend_title_align="center", plot_background=pn.element_rect(fill="white"), legend_key=pn.element_rect(fill="white", colour="white"), legend_title=pn.element_text(family="sans-serif", size=15), legend_text=pn.element_text(family="sans-serif", size=12), plot_title=pn.element_text(family="sans-serif", size=15), axis_text=pn.element_text(family="sans-serif", size=12), axis_title=pn.element_text(family="sans-serif", size=15), ) # Save figure fig.save( output_figure_filename, format="svg", bbox_inches="tight", transparent=True, pad_inches=0, dpi=300, ) print(fig)
def plot_metrics_comparison_lineplot_grid(dataframe, models_labels, metrics_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot( dataframe, p9.aes(x='threshold', y='value', group='variable', color='variable', shape='variable')) # Add the points and lines. + p9.geom_point() + p9.geom_line() # Rename the x axis and give some space to left and right. + p9.scale_x_discrete(name='Threshold', expand=(0, 0.2)) # Rename the y axis, give some space on top and bottom, and print the tick labels with 2 decimal digits. + p9.scale_y_continuous(name='Value', expand=(0, 0.05), labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Replace the names in the legend. + p9.scale_shape_discrete( name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_color_brewer(name='Metric', labels=lambda l: [metrics_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels for rows and columns. + p9.facet_grid('iterations ~ model', labeller=p9.labeller( rows=lambda x: f'iters = {x}', cols=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the y axis name. axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def __init__(self, base_size=11, base_family='DejaVu Sans'): theme_light.__init__(self, base_size, base_family) self.add_theme(theme( axis_ticks=element_line(color='#DDDDDD', size=0.5), panel_border=element_rect(fill='None', color='#838383', size=1), strip_background=element_rect( fill='#DDDDDD', color='#838383', size=1), strip_text_x=element_text(color='black'), strip_text_y=element_text(color='black', angle=-90) ), inplace=True)
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="pattern", y="count", label="fraction")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24, format_string='{:.1%}') + scale_x_discrete(limits=self._data["pattern"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Design Pattern Counts") + xlab("Design Pattern") + ylab("Count") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=24, height=8)
def __init__(self, base_size=11, base_family='DejaVu Sans'): theme_light.__init__(self, base_size, base_family) self.add_theme(theme( axis_ticks=element_line(color='#DDDDDD', size=0.5), panel_border=element_rect(fill='None', color='#838383', size=1), strip_background=element_rect( fill='#DDDDDD', color='#838383', size=1), strip_text_x=element_text(color='black'), strip_text_y=element_text(color='black', angle=-90), legend_key=element_blank() ), inplace=True)
def create(self, file_path: str) -> None: (ggplot(self._data, aes(x="category", y="count", label="percent")) + geom_bar(stat="identity", fill="#1e4f79") + geom_text(va='bottom', size=24) + scale_x_discrete(limits=self._data["category"]) + scale_y_continuous(labels=comma_format(), expand=[0.1, 0]) + ggtitle("Classes per Category") + xlab("Category") + ylab("Number of Classes") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_text_x=element_text(rotation=45, ha="right"))).save( file_path, width=7, height=7)
def __init__(self, base_size=11, base_family="DejaVu Sans"): theme_light.__init__(self, base_size, base_family) self.add_theme( theme( axis_ticks=element_line(color="#DDDDDD", size=0.5), panel_border=element_rect(fill="None", color="#838383", size=1), strip_background=element_rect(fill="#DDDDDD", color="#838383", size=1), strip_text_x=element_text(color="black"), strip_text_y=element_text(color="black", angle=-90), legend_key=element_blank(), ), inplace=True, )
def plot_boxplot_series(df, normalisation_method=None): """ Treating each column as a separate boxplot and each row as an independent observation (ie. different company) render a series of box plots to identify a shift in performance from the observations. normalisation_method should be one of the values present in SectorSentimentSearchForm.normalisation_choices """ # compute star performers: those who are above the mean on a given day counted over all days count = defaultdict(int) for col in df.columns: avg = df.mean(axis=0) winners = df[df[col] > avg[col]][col] for winner in winners.index: count[winner] += 1 winner_results = [] for asx_code, n_wins in count.items(): x = df.loc[asx_code].sum() # avoid "dead cat bounce" stocks which fall spectacularly and then post major increases in percentage terms if x > 0.0: winner_results.append((asx_code, n_wins, x)) # and plot the normalised data if normalisation_method is None or normalisation_method == "1": normalized_df = df y_label = "Percentage change" elif normalisation_method == "2": normalized_df = (df - df.min()) / (df.max() - df.min()) y_label = "Percentage change (min/max. scaled)" else: normalized_df = df / df.max(axis=0) # div by max if all else fails... y_label = "Percentage change (normalised by dividing by max)" n_inches = len(df.columns) / 5 melted = normalized_df.melt(ignore_index=False).dropna() plot = ( p9.ggplot(melted, p9.aes(x="fetch_date", y="value")) + p9.geom_boxplot(outlier_colour="blue") + p9.theme( axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), figure_size=(12, n_inches), ) + p9.labs(x="Date (YYYY-MM-DD)", y=y_label) + p9.coord_flip() ) return ( plot_as_inline_html_data(plot), list(reversed(sorted(winner_results, key=lambda t: t[2]))), )
def plot_market_wide_sector_performance(all_dates, field_name='change_in_percent'): """ Display specified dates for average sector performance. Each company is assumed to have at zero at the start of the observation period. A plot as base64 data is returned. """ df = company_prices(None, all_dates=all_dates, fields='change_in_percent') # None == all stocks n_stocks = len(df) # merge in sector information for each company code_and_sector = stocks_by_sector() n_unique_sectors = len(code_and_sector['sector_name'].unique()) print("Found {} unique sectors".format(n_unique_sectors)) #print(code_and_sector) df = df.merge(code_and_sector, left_on='asx_code', right_on='asx_code') print("Found {} stocks, {} sectors and merged total: {}".format( n_stocks, len(code_and_sector), len(df))) # compute average change in percent of each unique sector over each day and sum over the dates cumulative_pct_change = df.expanding(axis='columns').sum() # merge date-wise into df for date in cumulative_pct_change.columns: df[date] = cumulative_pct_change[date] #df.to_csv('/tmp/crap.csv') grouped_df = df.groupby('sector_name').mean() #grouped_df.to_csv('/tmp/crap.csv') # ready the dataframe for plotting grouped_df = pd.melt(grouped_df, ignore_index=False, var_name='date', value_name='cumulative_change_percent') grouped_df['sector'] = grouped_df.index grouped_df['date'] = pd.to_datetime(grouped_df['date']) n_col = 3 plot = (p9.ggplot( grouped_df, p9.aes('date', 'cumulative_change_percent', color='sector')) + p9.geom_line(size=1.0) + p9.facet_wrap('~sector', nrow=n_unique_sectors // n_col + 1, ncol=n_col, scales='free_y') + p9.xlab('') + p9.ylab('Average sector change (%)') + p9.theme(axis_text_x=p9.element_text(angle=30, size=6), axis_text_y=p9.element_text(size=6), figure_size=(12, 6), panel_spacing=0.3, legend_position='none')) return plot_as_inline_html_data(plot)
def create(self, file_path: str) -> None: (ggplot(self._data, aes("value")) + geom_histogram(bins=100, fill="#1e4f79") + facet_wrap(facets="variable", scales="free", ncol=3) + xlim(0, 1) + scale_y_continuous(labels=comma_format()) + ggtitle("Intensity of Design Pattern Use") + xlab("Percentage of Classes Participating in Design Pattern") + ylab("Number of Projects") + theme_classic(base_size=32, base_family="Helvetica") + theme(text=element_text(size=32), axis_title_y=element_text(margin={"r": 40}), subplots_adjust={ "wspace": 0.3, "hspace": 0.5 })).save(file_path, width=24, height=24)
def plot_preprocessing_boxplot_bymodel(dataframe, models_labels, metrics_labels, groups_labels, figure_size=(14, 4)): """ We define a function to plot the grid. """ return ( # Define the plot. p9.ggplot(dataframe, p9.aes(x='variable', y='value', fill='group')) # Add the boxplots. + p9.geom_boxplot(position='dodge') # Rename the x axis. + p9.scale_x_discrete(name='Metric', labels=lambda l: [metrics_labels[x] for x in l]) # Rename the y axis. + p9.scale_y_continuous( name='Value', expand=(0, 0.05), # breaks=[-0.25, 0, 0.25, 0.5, 0.75, 1], limits=[-0.25, 1], labels=lambda l: ['{:.2f}'.format(x) for x in l]) # Define the colors for the metrics for color-blind people. + p9.scale_fill_brewer(name='Group', labels=lambda l: [groups_labels[x] for x in l], type='qual', palette='Set2') # Place the plots in a grid, renaming the labels. + p9.facet_grid( 'model ~ .', scales='free_y', labeller=p9.labeller(rows=lambda x: f'{models_labels[x]}')) # Define the theme for the plot. + p9.theme( # Remove the x and y axis names. axis_title_x=p9.element_blank(), axis_title_y=p9.element_blank(), # Set the size of x and y tick labels font. axis_text_x=p9.element_text(size=7), axis_text_y=p9.element_text(size=7), # Place the legend on top, without title, and reduce the margin. legend_title=p9.element_blank(), legend_position='top', legend_box_margin=2, # Set the size for the figure. figure_size=figure_size, ))
def bsuite_bar_plot(df_in: pd.DataFrame, sweep_vars: Sequence[str] = None) -> gg.ggplot: """Output bar plot of bsuite data.""" df = _clean_bar_plot_data(df_in, sweep_vars) p = (gg.ggplot(df) + gg.aes(x='env', y='score', colour='type', fill='type') + gg.geom_bar(position='dodge', stat='identity') + gg.geom_hline(yintercept=1., linetype='dashed', alpha=0.5) + gg.scale_colour_manual(plotting.CATEGORICAL_COLOURS) + gg.scale_fill_manual(plotting.CATEGORICAL_COLOURS) + gg.xlab('experiment') + gg.theme(axis_text_x=gg.element_text(angle=25, hjust=1)) ) if not all(df.finished): # add a layer of alpha for unfinished jobs p += gg.aes(alpha='finished') p += gg.scale_alpha_discrete(range=[0.3, 1.0]) # Compute the necessary size of the plot if sweep_vars: p += gg.facet_wrap(sweep_vars, labeller='label_both', ncol=1) n_hypers = df[sweep_vars].drop_duplicates().shape[0] else: n_hypers = 1 return p + gg.theme(figure_size=(14, 3 * n_hypers + 1))
def theme_cognoma(fontsize_mult=1): return (gg.theme_bw(base_size=14 * fontsize_mult) + gg.theme( line=gg.element_line(color="#4d4d4d"), rect=gg.element_rect(fill="white", color=None), text=gg.element_text(color="black"), axis_ticks=gg.element_line(color="#4d4d4d"), legend_key=gg.element_rect(color=None), panel_border=gg.element_rect(color="#4d4d4d"), panel_grid=gg.element_line(color="#b3b3b3"), panel_grid_major_x=gg.element_blank(), panel_grid_minor=gg.element_blank(), strip_background=gg.element_rect(fill="#FEF2E2", color="#4d4d4d"), axis_text=gg.element_text(size=12 * fontsize_mult, color="#4d4d4d"), axis_title_x=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d"), axis_title_y=gg.element_text(size=13 * fontsize_mult, color="#4d4d4d")))
def plot_zmw_stats(self, **kwargs): """Plot of ZMW stats for all runs. Note ---- Raises an error if :meth:`Summaries.has_zmw_stats` is not `True`. Parameters ---------- ``**kwargs`` : dict Keyword arguments passed to :meth:`Summaries.zmw_stats`. Returns ------- plotnine.ggplot.ggplot Stacked bar graph of ZMW stats for each run. """ df = self.zmw_stats(**kwargs) p = (p9.ggplot(df, p9.aes(x='name', y='number', fill='status')) + p9.geom_col(position=p9.position_stack(reverse=True), width=0.8) + p9.theme(axis_text_x=p9.element_text(angle=90, vjust=1, hjust=0.5), figure_size=(0.4 * len(df['name'].unique()), 2.5) ) + p9.ylab('number of ZMWs') + p9.xlab('') ) if len(df['status'].unique()) < len(CBPALETTE): p = p + p9.scale_fill_manual(CBPALETTE[1:]) return p
def plot_categ_spatial(mod, adata, sample_col, color, n_columns=2, figure_size=(24, 5.7), point_size=0.8, text_size=9): for_plot = adata.obs[["imagecol", "imagerow", sample_col]] for_plot["color"] = color # fix types for_plot["color"] = pd.Categorical(for_plot["color"], ordered=True) # for_plot['color'] = pd.to_numeric(for_plot['color']) for_plot["sample"] = pd.Categorical(for_plot[sample_col], ordered=False) for_plot["imagecol"] = pd.to_numeric(for_plot["imagecol"]) for_plot["imagerow"] = -pd.to_numeric(for_plot["imagerow"]) ax = ( plotnine.ggplot( for_plot, plotnine.aes(x="imagecol", y="imagerow", color="color")) + plotnine.geom_point(size=point_size) # + plotnine.scale_color_cmap() + plotnine.coord_fixed() + plotnine.theme_bw() + plotnine.theme( panel_background=plotnine.element_rect( fill="black", colour="black", size=0, linetype="solid"), panel_grid_major=plotnine.element_line( size=0, linetype="solid", colour="black"), panel_grid_minor=plotnine.element_line( size=0, linetype="solid", colour="black"), strip_text=plotnine.element_text(size=text_size), ) + plotnine.facet_wrap("~sample", ncol=n_columns) + plotnine.theme(figure_size=figure_size)) return ax
def theme_cognoma(fontsize_mult=1): import plotnine as gg return (gg.theme_bw(base_size = 14 * fontsize_mult) + gg.theme( line = gg.element_line(color = "#4d4d4d"), rect = gg.element_rect(fill = "white", color = None), text = gg.element_text(color = "black"), axis_ticks = gg.element_line(color = "#4d4d4d"), legend_key = gg.element_rect(color = None), panel_border = gg.element_rect(color = "#4d4d4d"), panel_grid = gg.element_line(color = "#b3b3b3"), panel_grid_major_x = gg.element_blank(), panel_grid_minor = gg.element_blank(), strip_background = gg.element_rect(fill = "#FEF2E2", color = "#4d4d4d"), axis_text = gg.element_text(size = 12 * fontsize_mult, color="#4d4d4d"), axis_title_x = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d"), axis_title_y = gg.element_text(size = 13 * fontsize_mult, color="#4d4d4d") ))
def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False): if self.y_max is not None: limits = [0, float(self.y_max)] eprint(f'Setting limits to: {limits}') else: limits = [0, 1] if expo: if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: with open('data/external/all_human_gameplay.json') as f: all_gameplay = json.load(f) frames = [] for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]: if self.merge_humans: name = 'Human' gameplay = all_gameplay[event] if event != 'live': control_correct_positions = gameplay['control_correct_positions'] control_wrong_positions = gameplay['control_wrong_positions'] control_positions = control_correct_positions + control_wrong_positions control_positions = np.array(control_positions) control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0]) argsort_control = np.argsort(control_positions) control_x = control_positions[argsort_control] control_sorted_result = control_result[argsort_control] control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0] control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x}) control_df['Dataset'] = 'Regular Test' control_df['Guessing_Model'] = f' {name}' frames.append(control_df) adv_correct_positions = gameplay['adv_correct_positions'] adv_wrong_positions = gameplay['adv_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'IR Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) if len(gameplay['advneural_correct_positions']) > 0: adv_correct_positions = gameplay['advneural_correct_positions'] adv_wrong_positions = gameplay['advneural_wrong_positions'] adv_positions = adv_correct_positions + adv_wrong_positions adv_positions = np.array(adv_positions) adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0]) argsort_adv = np.argsort(adv_positions) adv_x = adv_positions[argsort_adv] adv_sorted_result = adv_result[argsort_adv] adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0] adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x}) adv_df['Dataset'] = 'RNN Adversarial' adv_df['Guessing_Model'] = f' {name}' frames.append(adv_df) human_df = pd.concat(frames) human_vals = sort_humans(list(human_df['Guessing_Model'].unique())) human_dtype = CategoricalDtype(human_vals, ordered=True) human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype) dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True) human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype) if no_models: p = ggplot(human_df) + geom_point(shape='.') else: df = self.char_plot_df if 1 not in self.rounds: df = df[df['Dataset'] != 'Round 1 - IR Adversarial'] if 2 not in self.rounds: df = df[df['Dataset'] != 'Round 2 - IR Adversarial'] df = df[df['Dataset'] != 'Round 2 - RNN Adversarial'] p = ggplot(df) if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans: eprint('Loading human data') p = p + geom_line(data=human_df) if columns: facet_conf = facet_wrap('Guessing_Model', ncol=1) else: facet_conf = facet_wrap('Guessing_Model', nrow=1) if not no_models: if self.mvg_avg_char: chart = stat_smooth(method='mavg', se=False, method_args={'window': 400}) else: chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5) else: chart = None p = ( p + facet_conf + aes(x='char_percent', y='correct', color='Dataset') ) if chart is not None: p += chart p = ( p + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + scale_x_continuous(breaks=[0, .5, 1]) + coord_cartesian(ylim=limits) + xlab('Percent of Question Revealed') + ylab('Accuracy') + theme( #legend_position='top', legend_box_margin=0, legend_title=element_blank(), strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5}) ) + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions') ) if self.title != '': p += ggtitle(self.title) return p else: if self.save_df is not None: eprint(f'Saving df to: {self.save_df}') df.to_json(self.save_df) return ( ggplot(self.char_plot_df) + aes(x='char_percent', y='correct', color='Guessing_Model') + stat_smooth(method='mavg', se=False, method_args={'window': 500}) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) + coord_cartesian(ylim=limits) )