def fix_BC_name( db_in, from_v='Effective Radiative Forcing|Anthropogenic|Albedo Change|Other|Deposition of Black Carbon on Snow', to_v='Effective Radiative Forcing|Anthropogenic|Other|BC on Snow', model="*OSCAR*"): """ Changes variable name in db :param db_in: :param from_v: Original name :param to_v: output name :param model: :return: db with from_v changed to to_v """ # Convert to dataframe: db = db_in.timeseries().reset_index() # Replace name: db["variable"] = db["variable"].apply(lambda x: to_v if x == from_v else x) # convert back to ScmDataFrame db = ScmDataFrame(db) return db
def test_convert_scmdf_to_tuningstruc_single_char_unit(tmpdir): test_df = ScmDataFrame( np.array([1, 2, 3]), index=[dt.datetime(y, 1, 1) for y in [1990, 1991, 1992]], columns={ "variable": "var", "region": "World", "unit": "K", "scenario": "test-scenario", "model": "test_model", "climate_model": "test_cm", "member_id": "tmember-id", }, ) convert_scmdf_to_tuningstruc(test_df, tmpdir, prefix="test_tuningstruc") expected_outfile = join( tmpdir, "test_tuningstruc_VAR_TEST-SCENARIO_TMEMBER-ID_WORLD.mat") reread = convert_tuningstruc_to_scmdf(expected_outfile) assert (reread["unit"] == "K").all()
def _tuningstrucs_blended_model_wrangling_inner_loop(src, regexp_inner, dst, force, prefix): collected = [] for dirpath_inner, _, filenames_inner in walk(src): if filenames_inner: if not regexp_inner.match(dirpath_inner): continue openscmdf = df_append([ load_scmdataframe(os.path.join(dirpath_inner, f)) for f in filenames_inner ]) tmp_ts = openscmdf.timeseries().reset_index() tmp_ts["unit"] = tmp_ts["unit"].astype(str) openscmdf = ScmDataFrame(tmp_ts) collected.append(openscmdf) convert_scmdf_to_tuningstruc(df_append(collected), dst, force=force, prefix=prefix)
def save_into_database(db, db_path, filename_leader): for cm in tqdm.tqdm_notebook( db["climatemodel"].unique(), leave=False, desc="Climate models" ): db_cm = db.filter(climatemodel=cm) for r in tqdm.tqdm_notebook( db_cm["region"].unique(), leave=False, desc="Regions" ): db_cm_r = db_cm.filter(region=r) for v in tqdm.tqdm_notebook( db_cm_r["variable"].unique(), leave=False, desc="Variables" ): db_cm_r_v = ScmDataFrame(db_cm_r.filter(variable=v)) filename = get_filename(db_cm_r_v, leader=filename_leader) outfile = os.path.join(db_path, filename) convert_scmdf_to_pyamdf_year_only(db_cm_r_v).to_csv(outfile) logger.debug("saved file to {}".format(outfile)) with open(os.path.join(db_path, "timestamp.txt"), "w") as fh: fh.write("database written at: ") fh.write(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) fh.write("\n")
def read_cmip6_concs_gmnhsh( # pylint:disable=too-many-locals filepath, region_coord_name="sector"): """ Read CMIP6 concentrations global and hemispheric mean data Parameters ---------- filepath : str Filepath from which to read the data region_coord_name : str The name of the co-ordinate which represents the region in the datafile. Returns ------- :obj:`ScmDataFrame` :obj:`ScmDataFrame` containing the global and hemispheric mean data Raises ------ AssertionError Defensive assertion: the code is being used in an unexpected way """ loaded_cube = iris.load_cube(filepath) checked_cube = _check_cube_and_adjust_if_needed(loaded_cube) region_map = { "GM": "World", "NH": "World|Northern Hemisphere", "SH": "World|Southern Hemisphere", } unit_map = {"1.e^-6": "ppm", "1.e^-9": "ppb", "1.e^-12": "ppt"} timeseries_cubes = {} for region_coord in checked_cube.coord(region_coord_name): if len(list(region_coord.cells())) != 1: # pragma: no cover raise AssertionError("Should only have one point now") original_names = { int(v.split(":")[0].strip()): v.split(":")[1].strip() for v in region_coord.attributes["original_names"].split(";") } original_regions = { k: v.split("_")[-1] for k, v in original_names.items() } region_coord_point = region_coord.cell(0).point region = region_map[original_regions[region_coord_point]] if checked_cube.shape[1] != 3 or checked_cube.shape[0] == 3: raise AssertionError("cube data shape isn't as expected") checked_cube.attributes["variable"] = checked_cube.var_name checked_cube.attributes[ "variable_standard_name"] = checked_cube.standard_name checked_cube.attributes["region"] = region if checked_cube.attributes["source_id"].startswith("UoM-CMIP"): scenario = "historical" model = "unspecified" else: scenario = "-".join("ssp{}".format( checked_cube.attributes["source_id"].split("ssp")[1]).split( "-")[:-3]) model = ( checked_cube.attributes["source_id"].split("-ssp")[0].replace( "UoM-", "")) checked_cube.attributes["scenario"] = scenario checked_cube.attributes["model"] = model checked_cube.attributes["climate_model"] = "MAGICC7" checked_cube.attributes["member_id"] = "unspecified" helper_region = SCMCube() helper_region.cube = checked_cube[:, region_coord_point] helper_region.cube.remove_coord(region_coord_name) timeseries_cubes[region] = helper_region output = (helper_region.convert_scm_timeseries_cubes_to_openscmdata( timeseries_cubes).timeseries().reset_index()) output["unit"] = output["unit"].map(unit_map) output["model"] = model output = ScmDataFrame(output) return output
def get_hfds_expected_results(): sftof_fracs = 100 - SURFACE_FRACS ocean_weights = sftof_fracs * AREA_WEIGHTS world_values = np.sum(np.sum(RAW_DATA * ocean_weights, axis=2), axis=1) / np.sum(ocean_weights) world_ocean_values = world_values nh_area_weights = np.copy(AREA_WEIGHTS) nh_area_weights[2, :] = 0 # we do these by hand: yes they're very slow but that's the point world_nh_ocean_values = np.array([ (30 * 100 + 40 * 70 + 50 * 100 + 60 * 90) * 1.2 + (110 * 20 + 190 * 100 + 260 * 50) * 2, (0 * 100 + 15 * 70 + 45 * 100 + 90 * 90) * 1.2 + (300 * 20 + 450 * 100 + 270 * 50) * 2, (60 * 100 + 120 * 70 + 60 * 100 + 60 * 90) * 1.2 + (510 * 20 + 220 * 100 + 280 * 50) * 2, ]) / ((100 + 70 + 100 + 90) * 1.2 + (20 + 100 + 50) * 2) world_nh_values = world_nh_ocean_values sh_area_weights = np.copy(AREA_WEIGHTS) sh_area_weights[:2, :] = 0 world_sh_ocean_values = np.array([ (3 * 80 + 60 * 90 + 20 * 49 + 40 * 85) * 1.1, (10 * 80 + 70 * 90 + 90 * 49 + 130 * 85) * 1.1, (50 * 80 + 60 * 90 + 55 * 49 + 60 * 85) * 1.1, ]) / ((80 + 90 + 49 + 85) * 1.1) world_sh_values = world_sh_ocean_values world_north_atlantic_values = np.array([260, 270, 280]) world_elnino_values = np.array([190, 450, 220]) data = np.vstack([ world_values, world_ocean_values, world_nh_values, world_sh_values, world_nh_ocean_values, world_sh_ocean_values, world_north_atlantic_values, world_elnino_values, ]).T exp = ScmDataFrame( data=data, index=SCMDF_TIME, columns={ "model": "unspecified", "scenario": "experiment", "region": [ "World", "World|Ocean", "World|Northern Hemisphere", "World|Southern Hemisphere", "World|Northern Hemisphere|Ocean", "World|Southern Hemisphere|Ocean", "World|North Atlantic Ocean", "World|El Nino N3.4", ], "variable": "hfds", "unit": "W m^-2", "climate_model": "model", "activity_id": "cmip5", "member_id": "realisation", "variable_standard_name": "surface_downward_heat_flux_in_sea_water", "mip_era": "CMIP5", }, ) exp.metadata = { "calendar": "gregorian", "modeling_realm": "ocean", "Conventions": "CF-1.5", "crunch_source_files": "Files: ['/cmip5/experiment/Omon/hfds/model/realisation/hfds_Omon_model_experiment_realisation_185001-185003.nc']; sftof: ['/cmip5/experiment/fx/sftof/model/r0i0p0/sftof_fx_model_experiment_r0i0p0.nc']; areacello: ['/cmip5/experiment/fx/areacello/model/r0i0p0/areacello_fx_model_experiment_r0i0p0.nc']", } exp = _add_land_area_metadata(exp, realm="ocean") return exp
def get_gpp_expected_results(): land_weights = SURFACE_FRACS * AREA_WEIGHTS world_values = np.sum(np.sum(RAW_DATA * land_weights, axis=2), axis=1) / np.sum(land_weights) world_land_values = world_values nh_area_weights = np.copy(AREA_WEIGHTS) nh_area_weights[2, :] = 0 # we do these by hand: yes they're very slow but that's the point world_nh_land_values = np.array([ (40 * 30 + 60 * 10) * 1.2 + (110 * 80 + 120 * 100 + 260 * 50) * 2, (15 * 30 + 90 * 10) * 1.2 + (300 * 80 + 350 * 100 + 270 * 50) * 2, (120 * 30 + 60 * 10) * 1.2 + (510 * 80 + 432 * 100 + 280 * 50) * 2, ]) / ((30 + 10) * 1.2 + (80 + 100 + 50) * 2) world_nh_values = world_nh_land_values sh_area_weights = np.copy(AREA_WEIGHTS) sh_area_weights[:2, :] = 0 world_sh_land_values = np.array([ (3 * 20 + 60 * 10 + 20 * 51 + 40 * 15) * 1.1, (10 * 20 + 70 * 10 + 90 * 51 + 130 * 15) * 1.1, (50 * 20 + 60 * 10 + 55 * 51 + 60 * 15) * 1.1, ]) / ((20 + 10 + 51 + 15) * 1.1) world_sh_values = world_sh_land_values data = np.vstack([ world_values, world_land_values, world_nh_values, world_sh_values, world_nh_land_values, world_sh_land_values, ]).T exp = ScmDataFrame( data=data, index=SCMDF_TIME, columns={ "model": "unspecified", "scenario": "experiment", "region": [ "World", "World|Land", "World|Northern Hemisphere", "World|Southern Hemisphere", "World|Northern Hemisphere|Land", "World|Southern Hemisphere|Land", ], "variable": "gpp", "unit": "kg m^-2 s^-1", "climate_model": "model", "activity_id": "cmip5", "member_id": "realisation", "variable_standard_name": "gross_primary_productivity_of_carbon", "mip_era": "CMIP5", }, ) exp.metadata = { "calendar": "gregorian", "modeling_realm": "land", "Conventions": "CF-1.5", "crunch_source_files": "Files: ['/cmip5/experiment/Lmon/gpp/model/realisation/gpp_Lmon_model_experiment_realisation_185001-185003.nc']; sftlf: ['/cmip5/experiment/fx/sftlf/model/r0i0p0/sftlf_fx_model_experiment_r0i0p0.nc']; areacella: ['/cmip5/experiment/fx/areacella/model/r0i0p0/areacella_fx_model_experiment_r0i0p0.nc']", } exp = _add_land_area_metadata(exp, realm="land") return exp
def get_rsdt_expected_results(): world_values = np.sum(np.sum(RAW_DATA * AREA_WEIGHTS, axis=2), axis=1) / np.sum(AREA_WEIGHTS) land_weights = SURFACE_FRACS * AREA_WEIGHTS world_land_values = np.sum(np.sum(RAW_DATA * land_weights, axis=2), axis=1) / np.sum(land_weights) ocean_weights = (100 - SURFACE_FRACS) * AREA_WEIGHTS world_ocean_values = np.sum(np.sum(RAW_DATA * ocean_weights, axis=2), axis=1) / np.sum(ocean_weights) nh_area_weights = np.copy(AREA_WEIGHTS) nh_area_weights[2, :] = 0 world_nh_values = np.sum(np.sum(RAW_DATA * nh_area_weights, axis=2), axis=1) / np.sum(nh_area_weights) sh_area_weights = np.copy(AREA_WEIGHTS) sh_area_weights[:2, :] = 0 world_sh_values = np.sum(np.sum(RAW_DATA * sh_area_weights, axis=2), axis=1) / np.sum(sh_area_weights) # we do these by hand: yes they're very slow but that's the point world_nh_land_values = np.array([ (40 * 30 + 60 * 10) * 1.2 + (110 * 80 + 120 * 100 + 260 * 50) * 2, (15 * 30 + 90 * 10) * 1.2 + (300 * 80 + 350 * 100 + 270 * 50) * 2, (120 * 30 + 60 * 10) * 1.2 + (510 * 80 + 432 * 100 + 280 * 50) * 2, ]) / ((30 + 10) * 1.2 + (80 + 100 + 50) * 2) world_sh_land_values = np.array([ (3 * 20 + 60 * 10 + 20 * 51 + 40 * 15) * 1.1, (10 * 20 + 70 * 10 + 90 * 51 + 130 * 15) * 1.1, (50 * 20 + 60 * 10 + 55 * 51 + 60 * 15) * 1.1, ]) / ((20 + 10 + 51 + 15) * 1.1) world_nh_ocean_values = np.array([ (30 * 100 + 40 * 70 + 50 * 100 + 60 * 90) * 1.2 + (110 * 20 + 190 * 100 + 260 * 50) * 2, (0 * 100 + 15 * 70 + 45 * 100 + 90 * 90) * 1.2 + (300 * 20 + 450 * 100 + 270 * 50) * 2, (60 * 100 + 120 * 70 + 60 * 100 + 60 * 90) * 1.2 + (510 * 20 + 220 * 100 + 280 * 50) * 2, ]) / ((100 + 70 + 100 + 90) * 1.2 + (20 + 100 + 50) * 2) world_sh_ocean_values = np.array([ (3 * 80 + 60 * 90 + 20 * 49 + 40 * 85) * 1.1, (10 * 80 + 70 * 90 + 90 * 49 + 130 * 85) * 1.1, (50 * 80 + 60 * 90 + 55 * 49 + 60 * 85) * 1.1, ]) / ((80 + 90 + 49 + 85) * 1.1) world_north_atlantic_values = np.array([260, 270, 280]) world_elnino_values = np.array([190, 450, 220]) data = np.vstack([ world_values, world_land_values, world_ocean_values, world_nh_values, world_sh_values, world_nh_land_values, world_sh_land_values, world_nh_ocean_values, world_sh_ocean_values, world_north_atlantic_values, world_elnino_values, ]).T exp = ScmDataFrame( data=data, index=SCMDF_TIME, columns={ "model": "unspecified", "scenario": "experiment", "region": [ "World", "World|Land", "World|Ocean", "World|Northern Hemisphere", "World|Southern Hemisphere", "World|Northern Hemisphere|Land", "World|Southern Hemisphere|Land", "World|Northern Hemisphere|Ocean", "World|Southern Hemisphere|Ocean", "World|North Atlantic Ocean", "World|El Nino N3.4", ], "variable": "rsdt", "unit": "W m^-2", "climate_model": "model", "activity_id": "cmip5", "member_id": "realisation", "variable_standard_name": "toa_incoming_shortwave_flux", "mip_era": "CMIP5", }, ) exp.metadata = { "calendar": "gregorian", "land_fraction": np.sum(AREA_WEIGHTS * SURFACE_FRACS) / (100 * np.sum(AREA_WEIGHTS)), "land_fraction_northern_hemisphere": np.sum(nh_area_weights * SURFACE_FRACS) / (100 * np.sum(nh_area_weights)), "land_fraction_southern_hemisphere": np.sum(sh_area_weights * SURFACE_FRACS) / (100 * np.sum(sh_area_weights)), "modeling_realm": "atmos", "Conventions": "CF-1.5", "crunch_source_files": "Files: ['/cmip5/experiment/Amon/rsdt/model/realisation/rsdt_Amon_model_experiment_realisation_185001-185003.nc']; areacella: ['/cmip5/experiment/fx/areacella/model/r0i0p0/areacella_fx_model_experiment_r0i0p0.nc']; sftlf: ['/cmip5/experiment/fx/sftlf/model/r0i0p0/sftlf_fx_model_experiment_r0i0p0.nc']", } exp = _add_land_area_metadata(exp, realm="atmos") return exp
if not os.path.isdir(OUTPUT_DATABASE_PATH): make_folders(OUTPUT_DATABASE_PATH) if not os.path.isdir(OBS_DATABASE_PATH): make_folders(OBS_DATABASE_PATH) # %% [markdown] # ## Protocol # %% SCENARIO_PROTOCOL = os.path.join(INPUT_DATA_DIR, "data", "protocol", "rcmip-emissions-annual-means.csv") # %% protocol_db = ScmDataFrame(SCENARIO_PROTOCOL) protocol_db.head() # %% protocol_db["scenario"].unique() # %% DATA_PROTOCOL = os.path.join( INPUT_DATA_DIR, "data", "submission-template", "rcmip-data-submission-template.xlsx", ) # %% protocol_variables = pd.read_excel(DATA_PROTOCOL,
import os.path import numpy as np import numpy.testing as npt import pytest from scmdata import ScmDataFrame from fair.tools.scmdf import scmdf_to_emissions, _get_fair_col_unit_context scenarios_to_test = ["ssp119", "ssp245", "ssp585"] scenarios_to_test = ["ssp119"] SCENARIOS = ScmDataFrame( os.path.join(os.path.dirname(__file__), "rcmip_scen_ssp_world_emissions.csv")).filter( scenario=scenarios_to_test) SSP245_EMMS = ScmDataFrame( os.path.join(os.path.dirname(__file__), "..", "..", "fair", "SSPs", "data", "rcmip-emissions-annual-means-4-0-0-ssp-only.csv")).filter( scenario="ssp245") MODEL_SCEN_DFS = [] for scen_scmdf in SCENARIOS.groupby("scenario"): for scen_model_scmdf in scen_scmdf.groupby("model"): MODEL_SCEN_DFS.append(scen_model_scmdf) @pytest.fixture(params=MODEL_SCEN_DFS) def scen_model_scmdfs(request): yield request.param
def convert_tuningstruc_to_scmdf( # pylint:disable=too-many-arguments,too-many-locals filepath, variable=None, region=None, unit=None, scenario=None, model=None): """ Convert a matlab tuningstruc to an ScmDataFrame Parameters ---------- filepath : str Filepath from which to load the data variable : str Name of the variable contained in the tuningstruc. If None, `convert_tuningstruc_to_scmdf` will attempt to determine it from the input file. region : str Region to which the data in the tuningstruc applies. If None, `convert_tuningstruc_to_scmdf` will attempt to determine it from the input file. unit : str Units of the data in the tuningstruc. If None, `convert_tuningstruc_to_scmdf` will attempt to determine it from the input file. scenario : str Scenario to which the data in the tuningstruc applies. If None, `convert_tuningstruc_to_scmdf` will attempt to determine it from the input file. model : str The (integrated assessment) model which generated the emissions scenario associated with the data in the tuningstruc. If None, `convert_tuningstruc_to_scmdf` will attempt to determine it from the input file and if it cannot, it will be set to "unspecified". Raises ------ KeyError If a metadata variable is not supplied and it cannot be determined from the tuningstruc. Returns ------- :obj:`ScmDataFrame` ScmDataFrame with the tuningstruc data """ dataset = mat4py.loadmat(filepath) for m, climate_model in enumerate(dataset["tuningdata"]["modelcodes"]): metadata = { "variable": [variable], "region": [region], "unit": [unit], "climate_model": [climate_model], "scenario": [scenario], "model": [model], } for k, v in metadata.items(): if v == [None]: try: metadata[k] = [dataset["tuningdata"]["model"][m][k]] except KeyError: if k == "model": metadata[k] = ["unspecified"] continue error_msg = "Cannot determine {} from file: " "{}".format( k, filepath) raise KeyError(error_msg) data = np.asarray(dataset["tuningdata"]["model"][m]["data"]) if len(data) != 2: data = data.T scmdf = ScmDataFrame(data=data[1], index=data[0], columns=metadata) try: ref_df.append(scmdf, inplace=True) except NameError: ref_df = scmdf return ref_df
def unify_units(in_df, protocol_variables, exc_info=False): out_df = in_df.copy() for variable in tqdm.tqdm_notebook(out_df["variable"].unique()): if variable.startswith("Radiative Forcing|Anthropogenic|Albedo Change"): target_unit = protocol_variables[ protocol_variables["variable"] == "Radiative Forcing|Anthropogenic|Albedo Change" ]["unit"].iloc[0] elif variable.startswith( "Effective Radiative Forcing|Anthropogenic|Albedo Change" ): target_unit = protocol_variables[ protocol_variables["variable"] == "Effective Radiative Forcing|Anthropogenic|Albedo Change" ]["unit"].iloc[0] elif variable.startswith("Carbon Pool"): target_unit = protocol_variables[ protocol_variables["variable"] == "Carbon Pool|Atmosphere" ]["unit"].iloc[0] elif "Other" in variable: target_unit = protocol_variables[ protocol_variables["variable"] == "{}".format(variable.split("|Other")[0]) ]["unit"].iloc[0] elif any([variable.endswith(suf) for suf in ["quantile", "mean", "stddev"]]): try: target_unit = protocol_variables[ protocol_variables["variable"] == "|".join(variable.split("|")[:-1]) ]["unit"].iloc[0] except: logger.exception( f"Failed to find unit for {variable}", exc_info=exc_info ) continue else: try: target_unit = protocol_variables[ protocol_variables["variable"] == variable ]["unit"].iloc[0] except: logger.exception( f"Failed to find unit for {variable}", exc_info=exc_info ) continue try: if "CH4" in target_unit: out_df = out_df.convert_unit( target_unit, variable=variable, context="CH4_conversions" ) continue if "NOx" in target_unit: out_df = out_df.convert_unit( target_unit, variable=variable, context="NOx_conversions" ) continue if target_unit == "Dimensionless": target_unit = "dimensionless" out_df = out_df.convert_unit(target_unit, variable=variable) except: current_unit = out_df.filter(variable=variable)["unit"].unique() logger.exception( f"Failed for {variable} with target unit: {target_unit} and current_unit: {current_unit}", exc_info=exc_info, ) out_df = out_df.timeseries().reset_index() out_df["unit_context"] = out_df["unit_context"].fillna("not_required") return ScmDataFrame(out_df)
# %% quantile = 'quantile' relevant_files = [str(p) for p in relevant_files if quantile not in p] print("Number of relevant files: {}".format(len(relevant_files))) relevant_files # %% [markdown] # ### Read in all variables: # %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"} db = [] for rf in tqdm.tqdm_notebook(relevant_files): # print(rf.endswith('sf')) if rf.endswith(".csv"): loaded = ScmDataFrame(rf) else: loaded = ScmDataFrame(rf, sheet_name="your_data") db.append(loaded.filter(variable=variables_erf, scenario=scenarios_fl)) # variables_of_interest)) print(db) db = df_append(db).timeseries().reset_index() db["unit"] = db["unit"].apply(lambda x: x.replace( "Dimensionless", "dimensionless") if isinstance(x, str) else x) clear_output() db = ScmDataFrame(db) db.head() # %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"} db[variable].unique()