def reshape_mpl(df, x, y, idx_cols, **kwargs): """Reshape data from long form to "bar plot form". Matplotlib requires x values as the index with one column for bar grouping. Table values come from y values. """ idx_cols = idx_cols + [x] if islistable(idx_cols) else [idx_cols] + [x] # check for duplicates rows = df[idx_cols].duplicated() if any(rows): _raise_data_error('Duplicates in plot data', df.loc[rows, idx_cols]) # reshape the data df = df.set_index(idx_cols)[y].unstack(x).T # reindex to get correct order for key, value in kwargs.items(): if df.columns.name == key: axis, _values = 'columns', df.columns.values elif df.index.name == key: axis, _values = 'index', list(df.index) else: raise ValueError(f'No dimension {key} in the data!') # if not given, determine order based on run control (if possible) if value is None and key in run_control()['order']: # select relevant items from run control, then add other cols value = [i for i in run_control()['order'][key] if i in _values] value += [i for i in _values if i not in value] df = df.reindex(**{axis: value}) return df
def categorize(self, name, value, criteria, color=None, marker=None, linestyle=None): """Assign scenarios to a category according to specific criteria or display the category assignment Parameters ---------- name: str category column name value: str category identifier criteria: dict dictionary with variables mapped to applicable checks ('up' and 'lo' for respective bounds, 'year' for years - optional) color: str assign a color to this category for plotting marker: str assign a marker to this category for plotting linestyle: str assign a linestyle to this category for plotting """ # add plotting run control for kind, arg in [('color', color), ('marker', marker), ('linestyle', linestyle)]: if arg: run_control().update({kind: {name: {value: arg}}}) # find all data that matches categorization rows = _apply_criteria(self.data, criteria, in_range=True, return_test='all') idx = _meta_idx(rows) if len(idx) == 0: logger().info("No scenarios satisfy the criteria") return # EXIT FUNCTION # update metadata dataframe self._new_meta_column(name, value) self.meta.loc[idx, name] = value msg = '{} scenario{} categorized as `{}: {}`' logger().info( msg.format(len(idx), '' if len(idx) == 1 else 's', name, value))
def __init__(self, data, **kwargs): """Initialize an instance of an IamDataFrame Parameters ---------- data: ixmp.TimeSeries, ixmp.Scenario, pd.DataFrame or data file an instance of an TimeSeries or Scenario (requires `ixmp`), or pd.DataFrame or data file with IAMC-format data columns. Special support is provided for data files downloaded directly from IIASA SSP and RCP databases. If you run into any problems loading data, please make an issue at: https://github.com/IAMconsortium/pyam/issues """ # import data from pd.DataFrame or read from source if isinstance(data, pd.DataFrame): self.data = format_data(data.copy()) elif has_ix and isinstance(data, ixmp.TimeSeries): self.data = read_ix(data, **kwargs) else: self.data = read_files(data, **kwargs) # define a dataframe for categorization and other metadata indicators self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX) self.reset_exclude() # execute user-defined code if 'exec' in run_control(): self._execute_run_control()
def assign_style_props(df, color=None, marker=None, linestyle=None, cmap=None): """Assign the style properties for a plot Parameters ---------- df : pd.DataFrame data to be used for style properties """ # determine color, marker, and linestyle for each line defaults = default_props(reset=True, num_colors=len(df), colormap=cmap) props = {} rc = run_control() kinds = [('color', color), ('marker', marker), ('linestyle', linestyle)] for kind, var in kinds: rc_has_kind = kind in rc if var in df.columns: rc_has_var = rc_has_kind and var in rc[kind] props_for_kind = {} for val in df[var].unique(): if rc_has_var and val in rc[kind][var]: props_for_kind[val] = rc[kind][var][val] # cycle any way to keep defaults the same next(defaults[kind]) else: props_for_kind[val] = next(defaults[kind]) props[kind] = props_for_kind return props
def assign_style_props(df, color=None, marker=None, linestyle=None, cmap=None): """Assign the style properties for a plot Parameters ---------- df : pd.DataFrame data to be used for style properties """ if color is None and cmap is not None: raise ValueError("`cmap` must be provided with the `color` argument") # determine color, marker, and linestyle for each line n = ( len(df[color].unique()) if color in df.columns else len(df[list(set(df.columns) & set(IAMC_IDX))].drop_duplicates()) ) defaults = default_props(reset=True, num_colors=n, colormap=cmap) props = {} rc = run_control() kinds = [("color", color), ("marker", marker), ("linestyle", linestyle)] for kind, var in kinds: rc_has_kind = kind in rc if var in df.columns: rc_has_var = rc_has_kind and var in rc[kind] props_for_kind = {} for val in df[var].unique(): if rc_has_var and val in rc[kind][var]: props_for_kind[val] = rc[kind][var][val] # cycle any way to keep defaults the same next(defaults[kind]) else: props_for_kind[val] = next(defaults[kind]) props[kind] = props_for_kind # update for special properties only if they exist in props if "color" in props: d = props["color"] values = list(d.values()) # find if any colors in our properties corresponds with special colors # we know about overlap_idx = np.in1d(values, list(PYAM_COLORS.keys())) if overlap_idx.any(): # some exist in our special set keys = np.array(list(d.keys()))[overlap_idx] values = np.array(values)[overlap_idx] # translate each from pyam name, like AR6-SSP2-45 to proper color # designation for k, v in zip(keys, values): d[k] = PYAM_COLORS[v] # replace props with updated dict without special colors props["color"] = d return props
def reshape_mpl(df, x, y, idx_cols, **kwargs): """Reshape data from long form to "bar plot form". Matplotlib requires x values as the index with one column for bar grouping. Table values come from y values. """ idx_cols = to_list(idx_cols) if x not in idx_cols: idx_cols += [x] # check for duplicates rows = df[idx_cols].duplicated() if any(rows): _raise_data_error("Duplicates in plot data", df.loc[rows, idx_cols]) # reshape the data df = df.set_index(idx_cols)[y].unstack(x).T # reindex to get correct order for key, value in kwargs.items(): level = None if df.columns.name == key: # single-dimension index axis, _values = "columns", df.columns.values elif df.index.name == key: # single-dimension index axis, _values = "index", list(df.index) elif key in df.columns.names: # several dimensions -> pd.MultiIndex axis, _values = "columns", get_index_levels(df.columns, key) level = key else: raise ValueError(f"No dimension {key} in the data!") # if not given, determine order based on run control (if possible) if value is None and key in run_control()["order"]: # select relevant items from run control, then add other cols value = [i for i in run_control()["order"][key] if i in _values] value += [i for i in _values if i not in value] df = df.reindex(**{axis: value, "level": level}) return df
def _execute_run_control(self): for module_block in run_control()['exec']: fname = module_block['file'] functions = module_block['functions'] dirname = os.path.dirname(fname) if dirname: sys.path.append(dirname) module = os.path.basename(fname).split('.')[0] mod = importlib.import_module(module) for func in functions: f = getattr(mod, func) f(self)
def __init__(self, data, **kwargs): """Initialize an instance of an IamDataFrame Parameters ---------- data: ixmp.TimeSeries, ixmp.Scenario, pd.DataFrame or data file an instance of an TimeSeries or Scenario (requires `ixmp`), or pd.DataFrame or data file with IAMC-format data columns """ # import data from pd.DataFrame or read from source if isinstance(data, pd.DataFrame): self.data = format_data(data.copy()) elif has_ix and isinstance(data, ixmp.TimeSeries): self.data = read_ix(data, **kwargs) else: self.data = read_files(data, **kwargs) # define a dataframe for categorization and other metadata indicators self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX) self.reset_exclude() # execute user-defined code if 'exec' in run_control(): self._execute_run_control()
def map_regions(self, map_col, agg=None, copy_col=None, fname=None, region_col=None, inplace=False): """Plot regional data for a single model, scenario, variable, and year see pyam.plotting.region_plot() for all available options Parameters ---------- map_col: string The column used to map new regions to. Common examples include iso and 5_region. agg: string, optional Perform a data aggregation. Options include: sum. copy_col: string, optional Copy the existing region data into a new column for later use. fname: string, optional Use a non-default region mapping file region_col: string, optional Use a non-default column name for regions to map from. inplace : bool, default False if True, do operation inplace and return None """ models = self.meta.index.get_level_values('model').unique() fname = fname or run_control()['region_mapping']['default'] mapping = read_pandas(fname).rename(str.lower, axis='columns') map_col = map_col.lower() ret = copy.deepcopy(self) if not inplace else self _df = ret.data columns_orderd = _df.columns # merge data dfs = [] for model in models: df = _df[_df['model'] == model] _col = region_col or '{}.REGION'.format(model) _map = mapping.rename(columns={_col.lower(): 'region'}) _map = _map[['region', map_col]].dropna().drop_duplicates() if copy_col is not None: df[copy_col] = df['region'] df = (df.merge(_map, on='region').drop( 'region', axis=1).rename(columns={map_col: 'region'})) dfs.append(df) df = pd.concat(dfs) # perform aggregations if agg == 'sum': df = df.groupby(LONG_IDX).sum().reset_index() ret.data = (df.reindex( columns=columns_orderd).sort_values(SORT_IDX).reset_index( drop=True)) if not inplace: return ret
def map_regions(self, map_col, agg=None, copy_col=None, fname=None, region_col=None, remove_duplicates=False, inplace=False): """Plot regional data for a single model, scenario, variable, and year see pyam.plotting.region_plot() for all available options Parameters ---------- map_col: string The column used to map new regions to. Common examples include iso and 5_region. agg: string, optional Perform a data aggregation. Options include: sum. copy_col: string, optional Copy the existing region data into a new column for later use. fname: string, optional Use a non-default region mapping file region_col: string, optional Use a non-default column name for regions to map from. remove_duplicates: bool, optional, default: False If there are duplicates in the mapping from one regional level to another, then remove these duplicates by counting the most common mapped value. This option is most useful when mapping from high resolution (e.g., model regions) to low resolution (e.g., 5_region). inplace : bool, default False if True, do operation inplace and return None """ models = self.meta.index.get_level_values('model').unique() fname = fname or run_control()['region_mapping']['default'] mapping = read_pandas(fname).rename(str.lower, axis='columns') map_col = map_col.lower() ret = copy.deepcopy(self) if not inplace else self _df = ret.data columns_orderd = _df.columns # merge data dfs = [] for model in models: df = _df[_df['model'] == model] _col = region_col or '{}.REGION'.format(model) _map = mapping.rename(columns={_col.lower(): 'region'}) _map = _map[['region', map_col]].dropna().drop_duplicates() _map = _map[_map['region'].isin(_df['region'])] if remove_duplicates and _map['region'].duplicated().any(): # find duplicates where_dup = _map['region'].duplicated(keep=False) dups = _map[where_dup] logger().warning(""" Duplicate entries found for the following regions. Mapping will occur only for the most common instance. {}""".format(dups['region'].unique())) # get non duplicates _map = _map[~where_dup] # order duplicates by the count frequency dups = (dups .groupby(['region', map_col]) .size() .reset_index(name='count') .sort_values(by='count', ascending=False) .drop('count', axis=1)) # take top occurance dups = dups[~dups['region'].duplicated(keep='first')] # combine them back _map = pd.concat([_map, dups]) if copy_col is not None: df[copy_col] = df['region'] df = (df .merge(_map, on='region') .drop('region', axis=1) .rename(columns={map_col: 'region'}) ) dfs.append(df) df = pd.concat(dfs) # perform aggregations if agg == 'sum': df = df.groupby(LONG_IDX).sum().reset_index() ret.data = (df .reindex(columns=columns_orderd) .sort_values(SORT_IDX) .reset_index(drop=True) ) if not inplace: return ret
def line_plot(df, x='year', y='value', ax=None, legend=None, title=True, color=None, marker=None, linestyle=None, cmap=None, rm_legend_label=[], **kwargs): """Plot data as lines with or without markers. Parameters ---------- df : pd.DataFrame Data to plot as a long-form data frame x : string, optional The column to use for x-axis values default: year y : string, optional The column to use for y-axis values default: value ax : matplotlib.Axes, optional legend : bool or dictionary, optional Add a legend. If a dictionary is provided, it will be used as keyword arguments in creating the legend. default: None (displays legend only if less than 13 entries) title : bool or string, optional Display a default or custom title. color : string, optional A valid matplotlib color or column name. If a column name, common values will be provided the same color. default: None marker : string, optional A valid matplotlib marker or column name. If a column name, common values will be provided the same marker. default: None linestyle : string, optional A valid matplotlib linestyle or column name. If a column name, common values will be provided the same linestyle. default: None cmap : string, optional A colormap to use. default: None rm_legend_label : string, list, optional Remove the color, marker, or linestyle label in the legend. default: [] kwargs : Additional arguments to pass to the pd.DataFrame.plot() function """ if ax is None: fig, ax = plt.subplots() df = reshape_line_plot(df, x, y) # long form to one column per line # determine color, marker, and linestyle for each line defaults = default_props(reset=True, num_colors=len(df.columns), colormap=cmap) props = {} prop_idx = {} rc = run_control() for kind, var in [('color', color), ('marker', marker), ('linestyle', linestyle)]: rc_has_kind = kind in rc if var in df.columns.names: rc_has_var = rc_has_kind and var in rc[kind] props_for_kind = {} for val in df.columns.get_level_values(var).unique(): if rc_has_var and val in rc[kind][var]: props_for_kind[val] = rc[kind][var][val] # cycle any way to keep defaults the same next(defaults[kind]) else: props_for_kind[val] = next(defaults[kind]) props[kind] = props_for_kind prop_idx[kind] = df.columns.names.index(var) # plot data, keeping track of which legend labels to apply no_label = [rm_legend_label] if isstr(rm_legend_label) else rm_legend_label for col, data in df.iteritems(): pargs = {} labels = [] # build plotting args and line legend labels for key, kind, var in [('c', 'color', color), ('marker', 'marker', marker), ('linestyle', 'linestyle', linestyle)]: if kind in props: label = col[prop_idx[kind]] pargs[key] = props[kind][label] if kind not in no_label: labels.append(repr(label).lstrip("u'").strip("'")) else: pargs[key] = var kwargs.update(pargs) data = data.dropna() data.plot(ax=ax, **kwargs) if labels: ax.lines[-1].set_label(' '.join(labels)) # build unique legend handles and labels handles, labels = ax.get_legend_handles_labels() handles, labels = np.array(handles), np.array(labels) _, idx = np.unique(labels, return_index=True) handles, labels = handles[idx], labels[idx] if legend is not False: _add_legend(ax, handles, labels, legend) # add default labels if possible ax.set_xlabel(x.title()) units = df.columns.get_level_values('unit').unique() units_for_ylabel = len(units) == 1 and x == 'year' and y == 'value' ylabel = units[0] if units_for_ylabel else y.title() ax.set_ylabel(ylabel) # build a default title if possible _title = [] for var in ['model', 'scenario', 'region', 'variable']: if var in df.columns.names: values = df.columns.get_level_values(var).unique() if len(values) == 1: _title.append('{}: {}'.format(var, values[0])) if title and _title: ax.set_title(' '.join(_title)) return ax, handles, labels
def stack_plot(df, x='year', y='value', stack='variable', ax=None, legend=True, title=True, cmap=None, total=None, **kwargs): """Plot data as a stack chart. Parameters ---------- df : pd.DataFrame Data to plot as a long-form data frame x : string, optional The column to use for x-axis values default: year y : string, optional The column to use for y-axis values default: value stack: string, optional The column to use for stack groupings default: variable ax : matplotlib.Axes, optional legend : bool, optional Include a legend default: False title : bool or string, optional Display a default or custom title. cmap : string, optional A colormap to use. default: None total : bool or dict, optional If True, plot a total line with default pyam settings. If a dict, then plot the total line using the dict key-value pairs as keyword arguments to ax.plot(). If None, do not plot the total line. default : None kwargs : Additional arguments to pass to the pd.DataFrame.plot() function """ for col in set(SORT_IDX) - set([x, stack]): if len(df[col].unique()) > 1: msg = 'Can not plot multiple {}s in stack_plot with x={}, stack={}' raise ValueError(msg.format(col, x, stack)) if ax is None: fig, ax = plt.subplots() # long form to one column per bar group _df = reshape_bar_plot(df, x, y, stack) # Line below is for interpolation. On datetimes I think you'd downcast to # seconds first and then cast back to datetime at the end..? _df.index = _df.index.astype(float) time_original = _df.index.values first_zero_times = pd.DataFrame(index=["first_zero_time"]) both_positive_and_negative = _df.apply( lambda x: (x >= 0).any() and (x < 0).any() ) for col in _df.loc[:, both_positive_and_negative]: values = _df[col].dropna().values positive = (values >= 0) negative = (values < 0) pos_to_neg = positive[:-1] & negative[1:] neg_to_pos = positive[1:] & negative[:-1] crosses = np.argwhere(pos_to_neg | neg_to_pos) for i, cross in enumerate(crosses): cross = cross[0] # get location x_1 = time_original[cross] x_2 = time_original[cross + 1] y_1 = values[cross] y_2 = values[cross + 1] zero_time = x_1 - y_1 * (x_2 - x_1) / (y_2 - y_1) if i == 0: first_zero_times.loc[:, col] = zero_time if zero_time not in _df.index.values: _df.loc[zero_time, :] = np.nan first_zero_times = first_zero_times.sort_values( by="first_zero_time", axis=1, ) _df = _df.reindex(sorted(_df.index)).interpolate(method="values") # Sort lines so that negative timeseries are on the right, positive # timeseries are on the left and timeseries which go from positive to # negative are ordered such that the timeseries which goes negative first # is on the right (case of timeseries which go from negative to positive # is an edge case we haven't thought about as it's unlikely to apply to # us). pos_cols = [c for c in _df if (_df[c] >= 0).all()] cross_cols = first_zero_times.columns[::-1].tolist() neg_cols = [c for c in _df if (_df[c] < 0).all()] col_order = pos_cols + cross_cols + neg_cols _df = _df[col_order] # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.columns), colormap=cmap)['color'] rc = run_control() colors = {} for key in _df.columns: c = next(defaults) c_in_rc = 'color' in rc if c_in_rc and stack in rc['color'] and key in rc['color'][stack]: c = rc['color'][stack][key] colors[key] = c # plot stacks, starting from the top and working our way down to the bottom negative_only_cumulative = _df.applymap( lambda x: x if x < 0 else 0 ).cumsum(axis=1) positive_only_cumulative = _df.applymap(lambda x: x if x >= 0 else 0)[ col_order[::-1] ].cumsum(axis=1)[ col_order ] time = _df.index.values upper = positive_only_cumulative.iloc[:, 0].values for j, col in enumerate(_df): noc_tr = negative_only_cumulative.iloc[:, j].values try: poc_nr = positive_only_cumulative.iloc[:, j + 1].values except IndexError: poc_nr = np.zeros_like(upper) lower = poc_nr.copy() if (noc_tr < 0).any(): lower[np.where(poc_nr == 0)] = noc_tr[np.where(poc_nr == 0)] ax.fill_between(time, lower, upper, label=col, color=colors[col], **kwargs) upper = lower.copy() # add total if (total is not None) and total: # cover case where total=False if isinstance(total, bool): # can now assume total=True total = {} total.setdefault("label", "Total") total.setdefault("color", "black") total.setdefault("lw", 4.0) ax.plot(time, _df.sum(axis=1), **total) # add legend ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) if not legend: ax.legend_.remove() # add default labels if possible ax.set_xlabel(x.capitalize()) units = df['unit'].unique() if len(units) == 1: ax.set_ylabel(units[0]) # build a default title if possible _title = [] for var in ['model', 'scenario', 'region', 'variable']: values = df[var].unique() if len(values) == 1: _title.append('{}: {}'.format(var, values[0])) if title and _title: title = ' '.join(_title) if title is True else title ax.set_title(title) return ax
def bar( df, x="year", y="value", bars="variable", order=None, bars_order=None, orient="v", legend=True, title=True, ax=None, cmap=None, **kwargs, ): """Plot data as a stacked or grouped bar chart Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted x : string, optional The column to use for x-axis values y : string, optional The column to use for y-axis values bars : string, optional The column to use for bar groupings order, bars_order : list, optional The order to plot the levels on the x-axis and the bars (and legend). If not specified, order by :meth:`run_control()['order'][\<stack\>] <pyam.run_control>` (where available) or alphabetical. orient : string, optional Vertical or horizontal orientation. legend : bool, optional Include a legend. title : bool or string, optional Display a default or custom title. ax : :class:`matplotlib.axes.Axes`, optional cmap : string, optional The name of a registered colormap. kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot` Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary # TODO: select only relevant meta columns if not isinstance(df, pd.DataFrame): df = df.as_pandas() for col in set(SORT_IDX) - set([x, bars]): if len(df[col].unique()) > 1: msg = "Can not plot multiple {}s in bar plot with x={}, bars={}" raise ValueError(msg.format(col, x, bars)) if ax is None: fig, ax = plt.subplots() # long form to one column per bar group _df = reshape_mpl(df, x, y, bars, **{x: order, bars: bars_order}) # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.columns), colormap=cmap)["color"] rc = run_control() color = [] for key in _df.columns: c = next(defaults) if "color" in rc and bars in rc["color"] and key in rc["color"][bars]: c = rc["color"][bars][key] color.append(c) # change year to str to prevent pandas/matplotlib from auto-ordering (#474) if _df.index.name == "year": _df.index = map(str, _df.index) # plot data kind = "bar" if orient.startswith("v") else "barh" _df.plot(kind=kind, color=color, ax=ax, **kwargs) # add legend ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5)) if not legend: ax.legend_.remove() # add default labels if possible if orient == "v": ax.set_xlabel(x.capitalize()) else: ax.set_ylabel(x.capitalize()) units = df["unit"].unique() if len(units) == 1 and y == "value": if orient == "v": ax.set_ylabel(units[0]) else: ax.set_xlabel(units[0]) # build a default title if possible _title = [] for var in ["model", "scenario", "region", "variable"]: values = df[var].unique() if len(values) == 1: _title.append("{}: {}".format(var, values[0])) if title and _title: title = " ".join(_title) if title is True else title ax.set_title(title) return ax
def stack( df, x="year", y="value", stack="variable", order=None, total=None, legend=True, title=True, ax=None, cmap=None, **kwargs, ): """Plot a stacked area chart of timeseries data Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted x : string, optional The column to use for x-axis values y : string, optional The column to use for y-axis values stack : string, optional The column to use for stack groupings order : list, optional The order to plot the stack levels and the legend. If not specified, order by :meth:`run_control()['order'][\<stack\>] <pyam.run_control>` (where available) or alphabetical. total : bool or dict, optional If True, plot a total line with default |pyam| settings. If a dict, then plot the total line using the dict key-value pairs as keyword arguments to :meth:`matplotlib.axes.Axes.plot`. If None, do not plot the total line. legend : bool, optional Include a legend. title : bool or string, optional Display a default or custom title. ax : :class:`matplotlib.axes.Axes`, optional cmap : string, optional The name of a registered colormap. kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot` Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary # TODO: select only relevant meta columns if not isinstance(df, pd.DataFrame): df = df.as_pandas() for col in set(SORT_IDX) - set([x, stack]): if len(df[col].unique()) > 1: msg = "Can not plot multiple {}s in stack_plot with x={}, stack={}" raise ValueError(msg.format(col, x, stack)) if ax is None: fig, ax = plt.subplots() # long form to one column per stack group _df = reshape_mpl(df, x, y, stack, **{stack: order}) # cannot plot timeseries that do not extend for the entire range has_na = _df.iloc[[0, -1]].isna().any() if any(has_na): msg = "Can not plot data that does not extend for the entire {} range" raise ValueError(msg.format(x)) def as_series(index, name): _idx = [i[0] for i in index] return pd.Series([0] * len(index), index=_idx, name=name) # determine all time-indices where a timeseries crosses 0 and add to data _rows = pd.concat( [ as_series(cross_threshold(_df[c], return_type=float), c) for c in _df.columns ], axis=1, ) _df = (_df.append(_rows.loc[_rows.index.difference( _df.index)]).sort_index().interpolate(method="index")) # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.columns), colormap=cmap)["color"] rc = run_control() colors = {} for key in _df.columns: c = next(defaults) c_in_rc = "color" in rc if c_in_rc and stack in rc["color"] and key in rc["color"][stack]: c = rc["color"][stack][key] colors[key] = c # determine positive and negative parts of the timeseries data _df_pos = _df.applymap(lambda x: max(x, 0)) _df_neg = _df.applymap(lambda x: min(x, 0)) lower = [0] * len(_df_pos) for col in reversed(_df_pos.columns): upper = _df_pos[col].fillna(0) + lower ax.fill_between( _df_pos.index, upper, lower, label=None, color=colors[col], linewidth=0, **kwargs, ) lower = upper upper = [0] * len(_df_neg) for col in _df_neg.columns: lower = _df_neg[col].fillna(0) + upper # add label only on negative to have it in right order ax.fill_between( _df_neg.index, upper, lower, label=col, color=colors[col], linewidth=0, **kwargs, ) upper = lower # add total if (total is not None) and total: # cover case where total=False if isinstance(total, bool): # can now assume total=True total = {} total.setdefault("label", "Total") total.setdefault("color", "black") total.setdefault("lw", 4.0) ax.plot(_df.index, _df.sum(axis=1), **total) # add legend ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5)) if not legend: ax.legend_.remove() # add default labels if possible ax.set_xlabel(x.capitalize()) units = df["unit"].unique() if len(units) == 1: ax.set_ylabel(units[0]) # build a default title if possible _title = [] for var in ["model", "scenario", "region", "variable"]: values = df[var].unique() if len(values) == 1: _title.append("{}: {}".format(var, values[0])) if title and _title: title = " ".join(_title) if title is True else title ax.set_title(title) return ax
def pie( df, value="value", category="variable", legend=False, title=True, ax=None, cmap=None, **kwargs, ): """Plot data as a pie chart. Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted value : string, optional The column to use for data values category : string, optional The column to use for labels legend : bool, optional Include a legend. title : bool or string, optional Display a default or custom title. ax : :class:`matplotlib.axes.Axes`, optional cmap : string, optional The name of a registered colormap. kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot`. Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary # TODO: select only relevant meta columns if not isinstance(df, pd.DataFrame): df = df.as_pandas() for col in set(SORT_IDX) - set([category]): if len(df[col].unique()) > 1: msg = ("Can not plot multiple {}s in a pie plot with value={}," + " category={}") raise ValueError(msg.format(col, value, category)) if ax is None: fig, ax = plt.subplots() # get data, set negative values to explode _df = df.groupby(category)[value].sum() where = _df > 0 explode = tuple(0 if _ else 0.2 for _ in where) _df = _df.abs() # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.index), colormap=cmap)["color"] rc = run_control() color = [] for key, c in zip(_df.index, defaults): if "color" in rc and category in rc["color"] and key in rc["color"][ category]: c = rc["color"][category][key] color.append(c) # plot data _df.plot(kind="pie", colors=color, ax=ax, explode=explode, **kwargs) # add legend ax.legend(loc="center left", bbox_to_anchor=(1.0, 0.5), labels=_df.index) if not legend: ax.legend_.remove() # remove label ax.set_ylabel("") return ax
def line_plot(df, x='year', y='value', ax=None, legend=None, title=True, color=None, marker=None, linestyle=None, cmap=None, **kwargs): """Plot data as lines with or without markers. Parameters ---------- df : pd.DataFrame Data to plot as a long-form data frame x : string, optional The column to use for x-axis values default: year y : string, optional The column to use for y-axis values default: value ax : matplotlib.Axes, optional legend : bool, optional Include a legend (`None` displays legend only if less than 13 entries) default: None title : bool or string, optional Display a default or custom title. color : string, optional A valid matplotlib color or column name. If a column name, common values will be provided the same color. default: None marker : string, optional A valid matplotlib marker or column name. If a column name, common values will be provided the same marker. default: None linestyle : string, optional A valid matplotlib linestyle or column name. If a column name, common values will be provided the same linestyle. default: None cmap : string, optional A colormap to use. default: None kwargs : Additional arguments to pass to the pd.DataFrame.plot() function """ if ax is None: fig, ax = plt.subplots() df = reshape_line_plot(df, x, y) # long form to one column per line # determine color, marker, and linestyle for each line defaults = default_props(reset=True, num_colors=len(df.columns), colormap=cmap) props = {} prop_idx = {} rc = run_control() for kind, var in [('color', color), ('marker', marker), ('linestyle', linestyle)]: rc_has_kind = kind in rc if var in df.columns.names: rc_has_var = rc_has_kind and var in rc[kind] props_for_kind = {} for val in df.columns.get_level_values(var).unique(): if rc_has_var and val in rc[kind][var]: props_for_kind[val] = rc[kind][var][val] # cycle any way to keep defaults the same next(defaults[kind]) else: props_for_kind[val] = next(defaults[kind]) props[kind] = props_for_kind prop_idx[kind] = df.columns.names.index(var) # plot data legend_data = [] for col, data in df.iteritems(): pargs = {} labels = [] for key, kind, var in [('c', 'color', color), ('marker', 'marker', marker), ('linestyle', 'linestyle', linestyle)]: if kind in props: label = col[prop_idx[kind]] pargs[key] = props[kind][label] labels.append(repr(label).lstrip("u'").strip("'")) else: pargs[key] = var legend_data.append(' '.join(labels)) kwargs.update(pargs) data.plot(ax=ax, **kwargs) # build legend handles and labels handles, labels = ax.get_legend_handles_labels() if legend_data != [''] * len(legend_data): labels = sorted(list(set(tuple(legend_data)))) idxs = [legend_data.index(d) for d in labels] handles = [handles[i] for i in idxs] if legend is None and len(labels) < 13 or legend is True: ax.legend(handles, labels) # add default labels if possible ax.set_xlabel(x.title()) units = df.columns.get_level_values('unit').unique() units_for_ylabel = len(units) == 1 and x == 'year' and y == 'value' ylabel = units[0] if units_for_ylabel else y.title() ax.set_ylabel(ylabel) # build a default title if possible _title = [] for var in ['model', 'scenario', 'region', 'variable']: if var in df.columns.names: values = df.columns.get_level_values(var).unique() if len(values) == 1: _title.append('{}: {}'.format(var, values[0])) if title and _title: ax.set_title(' '.join(_title)) return ax, handles, labels
def pie_plot(df, value='value', category='variable', ax=None, legend=False, title=True, cmap=None, **kwargs): """Plot data as a bar chart. Parameters ---------- df : pd.DataFrame Data to plot as a long-form data frame value : string, optional The column to use for data values default: value category : string, optional The column to use for labels default: variable ax : matplotlib.Axes, optional legend : bool, optional Include a legend default: False title : bool or string, optional Display a default or custom title. cmap : string, optional A colormap to use. default: None kwargs : Additional arguments to pass to the pd.DataFrame.plot() function """ for col in set(SORT_IDX) - set([category]): if len(df[col].unique()) > 1: msg = 'Can not plot multiple {}s in pie_plot with value={},' +\ ' category={}' raise ValueError(msg.format(col, value, category)) if ax is None: fig, ax = plt.subplots() # get data, set negative values to explode _df = df.groupby(category)[value].sum() where = _df > 0 explode = tuple(0 if _ else 0.2 for _ in where) _df = _df.abs() # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.index), colormap=cmap)['color'] rc = run_control() color = [] for key, c in zip(_df.index, defaults): if 'color' in rc and \ category in rc['color'] and \ key in rc['color'][category]: c = rc['color'][category][key] color.append(c) # plot data _df.plot(kind='pie', colors=color, ax=ax, explode=explode, **kwargs) # add legend ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5), labels=_df.index) if not legend: ax.legend_.remove() # remove label ax.set_ylabel('') return ax
def box(df, y="value", x="year", by=None, legend=True, title=None, ax=None, **kwargs): """Plot boxplot of data using seaborn.boxplot Parameters ---------- df : :class:`pyam.IamDataFrame`, :class:`pandas.DataFrame` Data to be plotted y : string, optional The column to use for y-axis values representing the distribution within the boxplot x : string, optional The column to use for x-axis points, i.e. the number of boxes the plot will have by : string, optional The column for grouping y-axis values at each x-axis point, i.e. a 3rd dimension. Data should be categorical, not a contiuous variable. legend : bool, optional Include a legend. title : bool or string, optional Display a default or custom title. ax : :class:`matplotlib.axes.Axes`, optional kwargs Additional arguments passed to :meth:`pandas.DataFrame.plot`. Returns ------- ax : :class:`matplotlib.axes.Axes` Modified `ax` or new instance """ # cast to DataFrame if necessary # TODO: select only relevant meta columns if not isinstance(df, pd.DataFrame): df = df.as_pandas() if by: rc = run_control() if "palette" not in kwargs and "color" in rc and by in rc["color"]: # TODO this only works if all categories are defined in run_control palette = rc["color"][by] df[by] = df[by].astype("category") df[by].cat.set_categories(list(palette), inplace=True) kwargs["palette"] = palette else: df.sort_values(by, inplace=True) if ax is None: fig, ax = plt.subplots() # Create the plot sns.boxplot(x=x, y=y, hue=by, data=df, ax=ax, **kwargs) # Add legend if legend: ax.legend(loc=2) ax.legend_.set_title("n=" + str(len(df[META_IDX].drop_duplicates())), ) # Axes labels if y == "value": ax.set_ylabel(df.unit.unique()[0]) else: ax.set_ylabel(y) if title: ax.set_title(title) return ax
def bar_plot(df, x='year', y='value', bars='variable', ax=None, orient='v', legend=True, title=True, cmap=None, **kwargs): """Plot data as a bar chart. Parameters ---------- df : pd.DataFrame Data to plot as a long-form data frame x : string, optional The column to use for x-axis values default: year y : string, optional The column to use for y-axis values default: value bars: string, optional The column to use for bar groupings default: variable ax : matplotlib.Axes, optional orient : string, optional Vertical or horizontal orientation. default: variable legend : bool, optional Include a legend default: False title : bool or string, optional Display a default or custom title. cmap : string, optional A colormap to use. default: None kwargs : Additional arguments to pass to the pd.DataFrame.plot() function """ for col in set(SORT_IDX) - set([x, bars]): if len(df[col].unique()) > 1: msg = 'Can not plot multiple {}s in bar_plot with x={}, bars={}' raise ValueError(msg.format(col, x, bars)) if ax is None: fig, ax = plt.subplots() # long form to one column per bar group _df = reshape_bar_plot(df, x, y, bars) # explicitly get colors defaults = default_props(reset=True, num_colors=len(_df.columns), colormap=cmap)['color'] rc = run_control() color = [] for key in _df.columns: c = next(defaults) if 'color' in rc and bars in rc['color'] and key in rc['color'][bars]: c = rc['color'][bars][key] color.append(c) # plot data kind = 'bar' if orient.startswith('v') else 'barh' _df.plot(kind=kind, color=color, ax=ax, **kwargs) # add legend ax.legend(loc='center left', bbox_to_anchor=(1.0, 0.5)) if not legend: ax.legend_.remove() # add default labels if possible if orient == 'v': ax.set_xlabel(x.capitalize()) else: ax.set_ylabel(x.capitalize()) units = df['unit'].unique() if len(units) == 1 and y == 'value': if orient == 'v': ax.set_ylabel(units[0]) else: ax.set_xlabel(units[0]) # build a default title if possible _title = [] for var in ['model', 'scenario', 'region', 'variable']: values = df[var].unique() if len(values) == 1: _title.append('{}: {}'.format(var, values[0])) if title and _title: title = ' '.join(_title) if title is True else title ax.set_title(title) return ax
def boxplot(df, y='value', x='year', by=None, ax=None, legend=True, title=None, **kwargs): """ Plot boxplot of data using seaborn.boxplot Parameters ---------- df : pandas.DataFrame Data to plot as a long-form data frame y : string, optional The column to use for y-axis values representing the distribution within the boxplot x : string, optional The column to use for x-axis points, i.e. the number of boxes the plot will have by : string, optional The column for grouping y-axis values at each x-axis point, i.e. a 3rd dimension. Data should be categorical, not a contiuous variable ax : matplotlib.Axes, optional legend : bool, optional Include a legend title : bool or string, optional Display a default or custom title kwargs : Additional arguments to pass to the pd.DataFrame.plot() """ if by: rc = run_control() if 'palette' not in kwargs and 'color' in rc and by in rc['color']: # TODO this only works if all categories are defined in run_control palette = rc['color'][by] df[by] = df[by].astype('category') df[by].cat.set_categories(list(palette), inplace=True) kwargs['palette'] = palette else: df.sort_values(by, inplace=True) if ax is None: fig, ax = plt.subplots() # plot sns.boxplot(x=x, y=y, hue=by, data=df, ax=ax, **kwargs) # Add legend if legend: ax.legend(loc=2) ax.legend_.set_title('n=' + str(len(df[META_IDX].drop_duplicates())), ) # Axes labels if y == 'value': ax.set_ylabel(df.unit.unique()[0]) else: ax.set_ylabel(y) if title: ax.set_title(title) return ax