def test_combine_rows_sumall(): df = combine_rows_df() exp = pd.DataFrame({ 'sector': [ 'sector1', 'extra_b', 'sector1', ], 'region': ['a', 'a', 'c'], '2010': [2.0, 21, 42], 'foo': [2.0, 21, 42], 'units': ['Mt'] * 3, 'gas': ['BC'] * 3, }).set_index(utils.df_idx) obs = utils.combine_rows(df, 'region', 'a', ['b'], sumall=False) exp = exp.reindex_axis(obs.columns, axis=1) clean = lambda df: df.sort_index().reset_index() pdt.assert_frame_equal(clean(obs), clean(exp))
def test_combine_rows_default(): df = combine_rows_df() exp = pd.DataFrame({ 'sector': [ 'sector1', 'sector2', 'extra_b', 'sector1', ], 'region': ['a', 'a', 'a', 'c'], '2010': [3.0, 4.0, 21, 42], 'foo': [1.0, -4.0, 21, 42], 'units': ['Mt'] * 4, 'gas': ['BC'] * 4, }).set_index(utils.df_idx) obs = utils.combine_rows(df, 'region', 'a', ['b']) exp = exp.reindex(columns=obs.columns) clean = lambda df: df.sort_index().reset_index() pdt.assert_frame_equal(clean(obs), clean(exp))
def _harmonize_regions(config, prefix, suffix, regions, hist, model, overrides, base_year, add_5regions): # clean model model = utils.subtract_regions_from_world(model, 'model', base_year) model = utils.remove_recalculated_sectors(model, prefix, suffix) # remove rows with all 0s model = model[(model.T > 0).any()] # clean hist hist = utils.subtract_regions_from_world(hist, 'hist', base_year) hist = utils.remove_recalculated_sectors(hist, prefix, suffix) # remove rows with all 0s hist = hist[(hist.T > 0).any()] if model.empty: raise RuntimeError( 'Model is empty after downselecting regional values') # harmonize utils.check_null(model, 'model') utils.check_null(hist, 'hist', fail=True) harmonizer = Harmonizer(model, hist, config=config) _log('Harmonizing (with example methods):') _log(harmonizer.methods(overrides=overrides).head()) if overrides is not None: _log('and override methods:') _log(overrides.head()) model = harmonizer.harmonize(overrides=overrides) utils.check_null(model, 'model') metadata = harmonizer.metadata() # add aggregate variables totals = '|'.join([prefix, suffix]) if model.index.get_level_values('sector').isin([totals]).any(): msg = 'Removing sector aggregates. Recalculating with harmonized totals.' _warn(msg) model.drop(totals, level='sector', inplace=True) model = ( utils.EmissionsAggregator(model) .add_variables(totals=totals, aggregates=False) .df .set_index(utils.df_idx) ) utils.check_null(model, 'model') # combine regional values to send back into template form model.reset_index(inplace=True) model = model.set_index(utils.df_idx).sort_index() glb = utils.combine_rows(model, 'region', 'World', sumall=True, rowsonly=True) model = glb.combine_first(model) # add 5regions if add_5regions: _log('Adding 5region values') # explicitly don't add World, it already exists from aggregation mapping = regions[regions['Native Region Code'] != 'World'].copy() aggdf = utils.agg_regions(model, mapping=mapping, rfrom='Native Region Code', rto='5_region') model = model.append(aggdf) assert(not model.isnull().values.any()) # duplicates come in from World and World being translated duplicates = model.index.duplicated(keep='first') if duplicates.any(): regions = model[duplicates].index.get_level_values('region').unique() msg = 'Dropping duplicate rows found for regions: {}'.format(regions) _warn(msg) model = model[~duplicates] return model, metadata