def convert_unit(df, current, to, factor=None, registry=None, context=None, inplace=False): """Internal implementation of unit conversion with explicit kwargs""" ret = df.copy() if not inplace else df # Mask for rows having *current* units to be converted, args for replace try: where = ret._data.index.get_loc_level(current, 'unit')[0] except KeyError: where = [False] * len(ret) index_args = [ret._data, 'unit', {current: to}] if factor: # Short code path: use an explicit conversion factor, don't use pint ret._data[where] *= factor ret._data.index = replace_index_values(*index_args) return None if inplace else ret # Convert using a pint.UnitRegistry; default the one from iam_units registry = registry or iam_units.registry # Make versions without -equiv _current, _to = [i.replace('-equiv', '') for i in [current, to]] # Pair of (magnitude, unit) qty = [ret.data.loc[where, 'value'].values, _current] try: # Create a vector pint.Quantity qty = registry.Quantity(*qty) except pint.UndefinedUnitError: # *qty* might include a GHG species; try GWP conversion result, _to = convert_gwp(context, qty, _to) except AttributeError: # .Quantity() did not exist raise TypeError(f'{registry} must be `pint.UnitRegistry`') from None else: # Ordinary conversion, using an empty Context if none was provided result = qty.to(_to, context or pint.Context()) # Copy values from the result Quantity and assign units ret._data[where] = result.magnitude ret._data.index = replace_index_values(*index_args) return None if inplace else ret
def test_replace_index_level(test_pd_df, test_df_index, exp_scen, mapping, rows): """Assert that replace_index_value works as expected""" test_pd_df["scenario"] = exp_scen if rows is None else ["scen_a"] + exp_scen[1:] exp = test_pd_df.set_index(IAMC_IDX) test_df_index.index = replace_index_values(test_df_index, "scenario", mapping, rows) pdt.assert_frame_equal(exp, test_df_index)
def test_replace_index_level(test_pd_df, test_df_index, exp_scen, mapping): """Assert that replace_index_value works as expected""" pd_df = test_pd_df.copy() pd_df["scenario"] = exp_scen exp = pd_df.set_index(IAMC_IDX) obs = test_df_index.copy() obs.index = replace_index_values(obs, "scenario", mapping) pdt.assert_frame_equal(exp, obs)
def test_replace_index_level(test_pd_df, test_df_index): """Assert that replace_index_value works as expected""" pd_df = test_pd_df.copy() pd_df['scenario'] = ['scen_c', 'scen_c', 'scen_b'] exp = pd_df.set_index(IAMC_IDX) obs = test_df_index.copy() obs.index = replace_index_values(obs, 'scenario', {'scen_a': 'scen_c'}) pdt.assert_frame_equal(exp, obs)
def _aggregate(df, variable, components=None, method=np.sum): """Internal implementation of the `aggregate` function""" if components is not None: # ensure that components is a proper list (not a dictionary) if not islistable(components) or isinstance(components, dict): raise ValueError( f"Value for `components` must be a list, found: {components}" ) # list of variables require default components (no manual list) if islistable(variable): raise NotImplementedError( "Aggregating by list of variables does not support `components`." ) mapping = {} msg = "Cannot aggregate variable '{}' because it has no components!" # if single variable if isstr(variable): # default components to all variables one level below `variable` components = components or df._variable_components(variable) if not len(components): logger.info(msg.format(variable)) return for c in components: mapping[c] = variable # else, use all variables one level below `variable` as components else: for v in variable if islistable(variable) else [variable]: _components = df._variable_components(v) if not len(_components): logger.info(msg.format(v)) continue for c in _components: mapping[c] = v # rename all components to `variable` and aggregate _df = df._data[df._apply_filters(variable=mapping.keys())] _df.index = replace_index_values(_df, "variable", mapping) return _group_and_agg(_df, [], method)
def _aggregate_region( df, variable, region, subregions=None, components=False, method="sum", weight=None, drop_negative_weights=True, ): """Internal implementation for aggregating data over subregions""" if not isstr(variable) and components is not False: raise ValueError( "Aggregating by list of variables with components is not supported!" ) if weight is not None and components is not False: raise ValueError("Using weights and components in one operation not supported!") # default subregions to all regions other than `region` subregions = subregions or df._all_other_regions(region, variable) if not len(subregions): logger.info( f"Cannot aggregate variable '{variable}' to '{region}' " "because it does not exist in any subregion!" ) return # compute aggregate over all subregions subregion_df = df.filter(region=subregions) rows = subregion_df._apply_filters(variable=variable) if weight is None: if drop_negative_weights is False: raise ValueError( "Dropping negative weights can only be used with `weights`!" ) _data = _group_and_agg(subregion_df._data[rows], "region", method=method) else: weight_rows = subregion_df._apply_filters(variable=weight) _data = _agg_weight( subregion_df._data[rows], subregion_df._data[weight_rows], method, drop_negative_weights, ) # if not `components=False`, add components at the `region` level if components: with adjust_log_level(logger): region_df = df.filter(region=region) # if `True`, auto-detect `components` at the `region` level, # defaults to variables below `variable` only present in `region` if components is True: level = dict(level=None) r_comps = region_df._variable_components(variable, **level) sr_comps = subregion_df._variable_components(variable, **level) components = set(r_comps).difference(sr_comps) if len(components): # rename all components to `variable` and aggregate rows = region_df._apply_filters(variable=components) _df = region_df._data[rows] mapping = {c: variable for c in components} _df.index = replace_index_values(_df.index, "variable", mapping) _data = _data.add(_group_and_agg(_df, "region"), fill_value=0) return _data
def test_replace_index_level_raises(test_df_index): """Assert that replace_index_value raises with non-existing level""" with pytest.raises(KeyError): replace_index_values(test_df_index, "foo", {"scen_a": "scen_c"})
def _op_data(df, name, method, axis, fillna=None, args=(), ignore_units=False, **kwds): """Internal implementation of numerical operations on timeseries""" if axis not in df._data.index.names: raise ValueError(f"Unknown axis: {axis}") if method in KNOWN_OPS: method = KNOWN_OPS[method] elif callable(method): pass else: raise ValueError(f"Unknown method: {method}") cols = df._data.index.names.difference([axis]) # replace args and and kwds with values of `df._data` if applicable # _data_args and _data_kwds track if an argument was replaced by `df._data` values n = len(args) _args, _data_args, _units_args = [None] * n, [False] * n, [None] * n for i, value in enumerate(args): _args[i], _units_args[i], _data_args[i] = _get_values( df, axis, value, cols, f"_arg{i}") _data_kwds, _unit_kwds = {}, {} for i, (key, value) in enumerate(kwds.items()): kwds[key], _unit_kwds[key], _data_kwds[key] = _get_values( df, axis, value, cols, key) # fast-pass on units: override pint for some methods if all kwds have the same unit if (method in [add, subtract, divide] and ignore_units is False and fillna is None and len(_unit_kwds["a"]) == 1 and len(_unit_kwds["b"]) == 1 and registry.Unit( _unit_kwds["a"][0]) == registry.Unit(_unit_kwds["b"][0])): # activate ignore-units feature ignore_units = _unit_kwds["a"][0] if method in [add, subtract] else "" # downcast `pint.Quantity` to numerical value kwds["a"], kwds["b"] = _to_value(kwds["a"]), _to_value(kwds["b"]) # cast args and kwds to pd.Series of pint.Quantity if ignore_units is False: for i, is_data in enumerate(_data_args): _args[i] = _to_quantity(_args[i]) if is_data else _args[i] for key, value in kwds.items(): kwds[key] = _to_quantity(value) if _data_kwds[key] else value # else remove units from pd.Series else: for i, is_data in enumerate(_data_args): _args[i] = _args[i].reset_index("unit", drop=True) if is_data else _args[i] for key, value in kwds.items(): kwds[key] = (value.reset_index("unit", drop=True) if _data_kwds[key] else value) # merge all args and kwds that are based on `df._data` to apply fillna if fillna: _data_cols = [ _args[i] for i, is_data in enumerate(_data_args) if is_data ] _data_cols += [ kwds[key] for key, is_data in _data_kwds.items() if is_data ] _data = pd.merge(*_data_cols, how="outer", left_index=True, right_index=True) _data.fillna(fillna, inplace=True) for i, is_data in enumerate(_data_args): if is_data: _args[i] = _data[f"_arg{i}"] for key, is_data in _data_kwds.items(): if is_data: kwds[key] = _data[key] # apply method and check that returned object is valid result = method(*_args, **kwds) if not isinstance(result, pd.Series): msg = f"Value returned by `{method.__name__}` cannot be cast to an IamDataFrame" raise ValueError(f"{msg}: {result}") # separate pint quantities into numerical value and unit (as index) if ignore_units is False: _value = pd.DataFrame( [[i.magnitude, "{:~}".format(i.units)] for i in result.values], columns=["value", "unit"], index=result.index, ).set_index("unit", append=True) _value.index = replace_index_values(_value, "unit", {"dimensionless": ""}) # otherwise, set unit (as index) to "unknown" or the value given by "ignore_units" else: index = append_index_level( result.index, codes=0, level="unknown" if ignore_units is True else ignore_units, name="unit", ) _value = pd.Series(result.values, index=index, name="value") # append the `name` to the index on the `axis` _value.index = append_index_level(_value.index, codes=0, level=name, name=axis) return _value
def test_replace_index_level_raises(test_df_index): """Assert that replace_index_value raises with non-existing level""" with pytest.raises(KeyError): replace_index_values(test_df_index, 'foo', {'scen_a': 'scen_c'})