def get_facet_rows(self, data, facet_by): nrow = self.opts.get("facet_nrow", None) ncol = self.opts.get("facet_ncol", None) if not nrow and not ncol: nplots = len(data[facet_by].unique()) nrow, ncol = n2mfrow(nplots) return (nrow, ncol)
def plot(df: 'DataFrame', group_colname: str = None, time_colname: str = None, max_num_groups: int = 1, split_dt: Optional[np.datetime64] = None, **kwargs) -> 'DataFrame': """ :param df: The output of `.to_dataframe()`. :param group_colname: The name of the group-column. :param time_colname: The name of the time-column. :param max_num_groups: Max. number of groups to plot; if the number of groups in the dataframe is greater than this, a random subset will be taken. :param split_dt: If supplied, will draw a vertical line at this date (useful for showing pre/post validation). :param kwargs: Further keyword arguments to pass to `plotnine.theme` (e.g. `figure_size=(x,y)`) :return: A plot of the predicted and actual values. """ from plotnine import ( ggplot, aes, geom_line, geom_ribbon, facet_grid, facet_wrap, theme_bw, theme, ylab, geom_vline ) is_components = ('process' in df.columns and 'state_element' in df.columns) if group_colname is None: group_colname = 'group' if group_colname not in df.columns: raise TypeError("Please specify group_colname") if time_colname is None: time_colname = 'time' if 'time' not in df.columns: raise TypeError("Please specify time_colname") df = df.copy() if df[group_colname].nunique() > max_num_groups: subset_groups = df[group_colname].drop_duplicates().sample(max_num_groups).tolist() if len(subset_groups) < df[group_colname].nunique(): print("Subsetting to groups: {}".format(subset_groups)) df = df.loc[df[group_colname].isin(subset_groups), :] num_groups = df[group_colname].nunique() aes_kwargs = {'x': time_colname} if is_components: aes_kwargs['group'] = 'state_element' plot = ( ggplot(df, aes(**aes_kwargs)) + geom_line(aes(y='mean'), color='#4C6FE7', size=1.5, alpha=.75) + geom_ribbon(aes(ymin='lower', ymax='upper'), color=None, alpha=.25) + ylab("") ) if is_components: num_processes = df['process'].nunique() if num_groups > 1 and num_processes > 1: raise ValueError("Cannot plot components for > 1 group and > 1 processes.") elif num_groups == 1: plot = plot + facet_wrap(f"~ measure + process", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: from plotnine.facets.facet_wrap import n2mfrow nrow, _ = n2mfrow(len(df[['process', 'measure']].drop_duplicates().index)) kwargs['figure_size'] = (12, nrow * 2.5) else: plot = plot + facet_grid(f"{group_colname} ~ measure", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: kwargs['figure_size'] = (12, num_groups * 2.5) if (df.groupby('measure')['process'].nunique() <= 1).all(): plot = plot + geom_line(aes(y='mean', color='state_element'), size=1.5) else: if 'actual' in df.columns: plot = plot + geom_line(aes(y='actual')) if num_groups > 1: plot = plot + facet_grid(f"{group_colname} ~ measure", scales='free_y', labeller='label_both') else: plot = plot + facet_wrap("~measure", scales='free_y', labeller='label_both') if 'figure_size' not in kwargs: kwargs['figure_size'] = (12, 5) if split_dt: plot = plot + geom_vline(xintercept=np.datetime64(split_dt), linetype='dashed') return plot + theme_bw() + theme(**kwargs)