def actualPred(y_true, y_pred): """Plot actual vs predicted line plots Parameters ---------- y_true : Series or vector The ground truth y_pred : Series or vector Predicted values """ datum = pd.DataFrame({ "date": range(y_true.shape[0]), "Actual": y_true, "Prediction": y_pred }) datum = pd.melt(datum, id_vars=['date'], value_vars=['Actual', 'Prediction']) p = ( ggplot(datum, aes(x='date')) + geom_line(aes(y='value', color='variable')) # line plot + labs(x='date', y='Solar Output') + plotnine.theme_538() + plotnine.theme(figure_size=(10, 6))) print(p)
def plot_score(df, plot_fn): f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="score")) + p9.geom_boxplot() + p9.labs(x="Model", y="EMOTION FEEL Score") + p9.theme_538() + p9.theme(legend_position="top", legend_direction="horizontal", figure_size=(10, 5)) + p9.theme(plot_background=p9.element_rect( fill=BG_COLOR, color=BG_COLOR, size=1))) f.save(plot_fn)
def scatterplot(cls, df): Utils.check_and_make_dir("Figures/Scatterplots") df = df[(df['index'] != 'Overall') & (df['index'] != 'No ROI')] # Remove No ROI and Overall rows df = df.groupby([config.table_cols, config.table_rows]).apply( lambda x: x.sort_values(['Mean'])) # Group by parameters and sort df = df.reset_index(drop=True) # Reset index to remove grouping scatterplots = ['roi_ordered', 'stat_ordered'] if config.table_row_order == 'roi': scatterplots.remove('stat') elif config.table_row_order == 'statorder': scatterplots.remove('roi_ordered') for scatterplot in scatterplots: if config.verbose: print(f"Saving {scatterplot} scatterplot!") if scatterplot == 'roi_ordered': roi_ord = pd.Categorical(df['index'], categories=df['index'].unique() ) # Order rows based on first facet else: roi_ord = pd.Categorical( df.groupby(['MB', 'SENSE' ]).cumcount()) # Order each facet individually figure_table = ( pltn.ggplot(df, pltn.aes(x="Mean", y=roi_ord)) + pltn.geom_point(na_rm=True, size=1) + pltn.geom_errorbarh( pltn.aes(xmin="Mean-Conf_Int_95", xmax="Mean+Conf_Int_95"), na_rm=True, height=None) + pltn.xlim(0, None) + pltn.scale_y_discrete(labels=[]) + pltn.ylab(config.table_y_label) + pltn.xlab(config.table_x_label) + pltn.facet_grid('{rows}~{cols}'.format(rows=config.table_rows, cols=config.table_cols), drop=True, labeller="label_both") + pltn.theme_538() # Set theme + pltn.theme( panel_grid_major_y=pltn.themes.element_line(alpha=0), panel_grid_major_x=pltn.themes.element_line(alpha=1), panel_background=pltn.element_rect(fill="gray", alpha=0.1), dpi=config.plot_dpi)) figure_table.save( f"Figures/Scatterplots/{scatterplot}_scatterplot.png", height=config.plot_scale, width=config.plot_scale * 3, verbose=False, limitsize=False)
def plot_rank_full(df, plot_fn): f = (p9.ggplot(df, p9.aes(x="emotion_cat", y="ratio", fill="factor(rank)")) + p9.geom_bar(stat="identity") + p9.facet_wrap("cluster_labels_6") + p9.labs(x="Model", y="Proportion (%)", fill="Rank") + p9.theme_538() + p9.theme(legend_position="top", legend_direction="horizontal", figure_size=(10, 5)) + p9.theme(plot_background=p9.element_rect( fill=BG_COLOR, color=BG_COLOR, size=1), axis_text_x=p9.element_text(rotation=45, hjust=1))) f.save(plot_fn)
def scatter(y_true, y_pred): """Plot actual vs predicted scatterplot Parameters ---------- y_true : Series or vector The ground truth y_pred : Series or vector Predicted values """ datum = pd.DataFrame({"Actual": y_true, "Prediction": y_pred}) p = ( ggplot(datum, aes(x='Actual', y="Prediction", color='"#9B59B6"')) + geom_point() # line plot + labs(x='Actual', y='Prediction') + plotnine.theme_538() + plotnine.theme(figure_size=(10, 6))) print(p)
def get_lag_corr(y_actual, y_pred, num_lags): """Calculates & plots Lag Correlation Parameters ---------- y_actual : Series or vector The ground truth y_pred : Series or vector Predicted values num_lags : int Lag to consider - range (0, num_lags) """ lags = [] for c in range(num_lags): lagged = pd.Series(y_pred).shift(c) lags.append( scipy.stats.spearmanr(lagged, y_actual, nan_policy='omit')[0]) datum = pd.DataFrame({"Lags": range(len(lags)), "Lag-Coefficient": lags}) p = (ggplot(datum, aes(x='Lags')) + geom_line(aes(y='Lag-Coefficient')) + labs(x='Lag', y='Coefficient') + plotnine.theme_538() + plotnine.theme(figure_size=(10, 6))) print(p)
def test_theme_538(self): p = self.g + labs(title='Theme 538') + theme_538() assert p + _theme == 'theme_538'
def histogram_make(roi, combined_raw_df, list_rois, config, xlimit, save_function, find_xlim_function): if combined_raw_df.empty: if config.verbose: print( 'INFO: Histograms cannot be made for the No ROI category.') return else: thisroi = list_rois[roi] figure = ( pltn.ggplot(combined_raw_df, pltn.aes(x="voxel_value")) + pltn.theme_538() + pltn.geom_histogram( binwidth=config.histogram_binwidth, fill=config.histogram_fig_colour, boundary=0, na_rm=True ) # Boundary centers the bars, na_rm cancels error from setting an xlimit + pltn.facet_grid( f"{config.histogram_fig_y_facet}~{config.histogram_fig_x_facet}", drop=True, labeller="label_both") + pltn.labs(x=config.histogram_fig_label_x, y=config.histogram_fig_label_y) + pltn.theme( panel_grid_minor_x=pltn.themes.element_line(alpha=0), panel_grid_major_x=pltn.themes.element_line(alpha=1), panel_grid_major_y=pltn.element_line(alpha=0), plot_background=pltn.element_rect(fill="white"), panel_background=pltn.element_rect(fill="gray", alpha=0.1), axis_title_x=pltn.element_text( weight='bold', color='black', size=20), axis_title_y=pltn.element_text( weight='bold', color='black', size=20), strip_text_x=pltn.element_text( weight='bold', size=10, color='black'), strip_text_y=pltn.element_text( weight='bold', size=10, color='black'), axis_text_x=pltn.element_text(size=10, color='black'), axis_text_y=pltn.element_text(size=10, color='black'), dpi=config.plot_dpi)) # Display mean or median as vertical lines on plot if config.histogram_show_mean or config.histogram_show_median: figure += pltn.geom_vline(pltn.aes(xintercept="stat_value", color="Statistic"), size=config.histogram_stat_line_size) figure += pltn.scale_color_manual(values=[ config.colorblind_friendly_plot_colours[3], config.colorblind_friendly_plot_colours[1] ]) # Display legend for mean and median if not config.histogram_show_legend: figure += pltn.theme(legend_position='none') if xlimit: # Set y limit of figure (used to make it the same for every barchart) figure += pltn.xlim(-1, xlimit) thisroi += '_same_xlim' else: figure += pltn.xlim(-1, None) returned_xlim = 0 if config.use_same_axis_limits in ('Same limits', 'Create both') and xlimit == 0: returned_xlim = find_xlim_function(thisroi, figure, 'xaxis') if config.use_same_axis_limits == 'Same limits' and xlimit == 0: return returned_xlim elif xlimit != 0: folder = 'Same_xaxis' else: folder = 'Different_xaxis' # Suppress Pandas warning about alignment of non-concatenation axis warnings.simplefilter(action='ignore', category=FutureWarning) save_function(figure, thisroi, config, folder, 'histogram') warnings.simplefilter(action='default', category=FutureWarning) return returned_xlim
def barchart_make(roi, df, list_rois, config, ylimit, save_function, find_ylim_function): thisroi = list_rois[roi] current_df = df.loc[df['index'] == thisroi] current_df = current_df.sort_values([config.single_roi_fig_x_axis]) current_df = current_df.reset_index( drop=True) # Reset index to remove grouping current_df[config.single_roi_fig_x_axis] = pd.Categorical( current_df[config.single_roi_fig_x_axis], categories=current_df[config.single_roi_fig_x_axis].unique()) figure = ( pltn.ggplot( current_df, pltn.aes(x=config.single_roi_fig_x_axis, y='Mean', ymin="Mean-Conf_Int_95", ymax="Mean+Conf_Int_95", fill='factor({colour})'.format( colour=config.single_roi_fig_colour))) + pltn.theme_538() + pltn.geom_col(position=pltn.position_dodge( preserve='single', width=0.8), width=0.8, na_rm=True) + pltn.geom_errorbar(size=1, position=pltn.position_dodge( preserve='single', width=0.8)) + pltn.labs(x=config.single_roi_fig_label_x, y=config.single_roi_fig_label_y, fill=config.single_roi_fig_label_fill) + pltn.scale_x_discrete(labels=[]) + pltn.theme(panel_grid_major_x=pltn.element_line(alpha=0), axis_title_x=pltn.element_text( weight='bold', color='black', size=20), axis_title_y=pltn.element_text( weight='bold', color='black', size=20), axis_text_y=pltn.element_text(size=20, color='black'), legend_title=pltn.element_text(size=20, color='black'), legend_text=pltn.element_text(size=18, color='black'), subplots_adjust={'right': 0.85}, legend_position=(0.9, 0.8), dpi=config.plot_dpi) + pltn.geom_text(pltn.aes(y=-.7, label=config.single_roi_fig_x_axis), color='black', size=20, va='top') + pltn.scale_fill_manual( values=config.colorblind_friendly_plot_colours)) if ylimit: # Set y limit of figure (used to make it the same for every barchart) figure += pltn.ylim(None, ylimit) thisroi += '_same_ylim' returned_ylim = 0 if config.use_same_axis_limits in ('Same limits', 'Create both') and ylimit == 0: returned_ylim = find_ylim_function(thisroi, figure, 'yaxis') if config.use_same_axis_limits == 'Same limits' and ylimit == 0: return returned_ylim elif ylimit != 0: folder = 'Same_yaxis' else: folder = 'Different_yaxis' save_function(figure, thisroi, config, folder, 'barchart') return returned_ylim
def plot_performance(df, report_year, eval_period): """ Plot metric-specific performance for a set of stocks over time. Reference: https://www.buffettsbooks.com/how-to-invest-in-stocks/intermediate-course/lesson-20/ :param df: DataFrame containing stock tickers and the columns specified below :param report_year: Year of most recent financial report :param eval_period: Number of years prior to most recent report to be analyzed :return: A list of ggplot objects :rtype: List """ start_year = report_year - eval_period df = df.loc[df['year'] >= start_year] df = df[[ 'symbol', 'year', 'eps', 'bookValuePerShare', 'roe', 'currentRatio', 'debtToEquity' ]] df['roe'] = df['roe'].apply(lambda x: x * 100.0) df = df.rename( { 'eps': 'Earnings per Share', 'roe': 'Return on Equity', 'currentRatio': 'Current Ratio', 'debtToEquity': 'Debt to Equity Ratio', 'bookValuePerShare': 'Book Value per Share' }, axis='columns') df.sort_values(by=['symbol', 'year'], inplace=True, ascending=True) df.dropna(inplace=True) # Commenting out for now, API no longer returning this col in income-statement response label_dict = { 'Earnings per Share': 'The EPS shows the company\'s profit per share. This chart ' 'should have a positive slope over time. Stable results ' 'here are extremely important for forecasting future cash ' 'flows. Note: if the company\'s book value has increased ' 'over time, the EPS should demonstrate similar growth.', # 'Dividend per Share': 'This chart shows the dividend history of the company. ' # 'This should have a flat to positive slope over time. If ' # 'you see a drastic drop, it may represent a stock split ' # 'for the company. Note: the dividend is taken from a ' # 'portion of the EPS, the remainder goes to the book value.', 'Book Value per Share': 'The book value represents the liquidation value of the ' 'entire company (per share). It\'s important to see ' 'this number increasing over time. If the company pays a' ' high dividend, the book value may grow at a slower ' 'rate. If the company pays no dividend, the book value ' 'should grow with the EPS each year.', 'Return on Equity': 'Return on equity is very important because it show the ' 'return that management has received for reinvesting the ' 'profits of the company. If using an intrinsic value ' 'calculator, it\'s very important that this number is flat or' ' increasing for accurate results. Find companies with a ' 'consistent ROE above 8%.', 'Current Ratio': 'The current ratio helps measure the health of the company in ' 'the short term. As a rule of thumb, the current ratio should be' ' above 1.0. A safe current ratio is typically above 1.5. Look ' 'for stability trends within the current ratio to see how the ' 'company manages their short term risk.', 'Debt to Equity Ratio': 'The debt to equity ratio helps measure the health of ' 'the company in the long term. As a rule of thumb, the ' 'debt to equity ratio should be lower than 0.5. Look for ' 'stability trends within the debt/equity ratio to see how' ' the company manages their long term risk.' } wrapper = textwrap.TextWrapper(width=120) for key, value in label_dict.items(): label_dict[key] = wrapper.fill(text=value) plots = [] cols = df.columns[2:].tolist() for metric in cols: p = (ggplot(df, aes('year', metric, color='symbol')) + geom_line(size=1, alpha=0.8) + geom_point(size=3, alpha=0.8) + labs(title=metric, x='Report Year', y='', color='Ticker') + theme_538() + theme(legend_position='left', plot_title=element_text(weight='bold')) + scale_x_continuous(breaks=range(min(df['year']), max(df['year']) + 1, 1)) + scale_y_continuous( breaks=range(min(df[metric].astype(int)), max(round(df[metric]).astype(int)) + 2, 1)) + annotate(geom='label', x=statistics.mean((df['year'])), y=max(round(df[metric]).astype(int) + 1), label=label_dict[metric], size=8, label_padding=0.8, fill='#F7F7F7')) plots.append(p) return plots