Python ScmDataFrame示例，scmdata.ScmDataFrame Python示例

示例#1

0

显示文件

文件： misc_func.py 项目： annefou/AR6_CH6_RCMIPFIGS

def fix_BC_name(
        db_in,
        from_v='Effective Radiative Forcing|Anthropogenic|Albedo Change|Other|Deposition of Black Carbon on Snow',
        to_v='Effective Radiative Forcing|Anthropogenic|Other|BC on Snow',
        model="*OSCAR*"):
    """
    Changes variable name in db
    :param db_in:
    :param from_v: Original name
    :param to_v: output name
    :param model:
    :return: db with from_v changed to to_v
    """
    # Convert to dataframe:
    db = db_in.timeseries().reset_index()
    # Replace name:
    db["variable"] = db["variable"].apply(lambda x: to_v if x == from_v else x)
    # convert back to ScmDataFrame
    db = ScmDataFrame(db)
    return db

示例#2

0

显示文件

文件： test_tuningstruc_to_openscmdf.py 项目： smutch/netcdf-scm

def test_convert_scmdf_to_tuningstruc_single_char_unit(tmpdir):
    test_df = ScmDataFrame(
        np.array([1, 2, 3]),
        index=[dt.datetime(y, 1, 1) for y in [1990, 1991, 1992]],
        columns={
            "variable": "var",
            "region": "World",
            "unit": "K",
            "scenario": "test-scenario",
            "model": "test_model",
            "climate_model": "test_cm",
            "member_id": "tmember-id",
        },
    )

    convert_scmdf_to_tuningstruc(test_df, tmpdir, prefix="test_tuningstruc")
    expected_outfile = join(
        tmpdir, "test_tuningstruc_VAR_TEST-SCENARIO_TMEMBER-ID_WORLD.mat")

    reread = convert_tuningstruc_to_scmdf(expected_outfile)
    assert (reread["unit"] == "K").all()

示例#3

0

显示文件

文件： cli.py 项目： smutch/netcdf-scm

def _tuningstrucs_blended_model_wrangling_inner_loop(src, regexp_inner, dst,
                                                     force, prefix):
    collected = []
    for dirpath_inner, _, filenames_inner in walk(src):
        if filenames_inner:
            if not regexp_inner.match(dirpath_inner):
                continue

            openscmdf = df_append([
                load_scmdataframe(os.path.join(dirpath_inner, f))
                for f in filenames_inner
            ])
            tmp_ts = openscmdf.timeseries().reset_index()
            tmp_ts["unit"] = tmp_ts["unit"].astype(str)
            openscmdf = ScmDataFrame(tmp_ts)

            collected.append(openscmdf)

    convert_scmdf_to_tuningstruc(df_append(collected),
                                 dst,
                                 force=force,
                                 prefix=prefix)

示例#4

0

显示文件

文件： database_generation.py 项目： annefou/AR6_CH6_RCMIPFIGS

def save_into_database(db, db_path, filename_leader):
    for cm in tqdm.tqdm_notebook(
            db["climatemodel"].unique(), leave=False, desc="Climate models"
    ):
        db_cm = db.filter(climatemodel=cm)
        for r in tqdm.tqdm_notebook(
                db_cm["region"].unique(), leave=False, desc="Regions"
        ):
            db_cm_r = db_cm.filter(region=r)
            for v in tqdm.tqdm_notebook(
                    db_cm_r["variable"].unique(), leave=False, desc="Variables"
            ):
                db_cm_r_v = ScmDataFrame(db_cm_r.filter(variable=v))
                filename = get_filename(db_cm_r_v, leader=filename_leader)
                outfile = os.path.join(db_path, filename)

                convert_scmdf_to_pyamdf_year_only(db_cm_r_v).to_csv(outfile)
                logger.debug("saved file to {}".format(outfile))

    with open(os.path.join(db_path, "timestamp.txt"), "w") as fh:
        fh.write("database written at: ")
        fh.write(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        fh.write("\n")

示例#5

0

显示文件

def read_cmip6_concs_gmnhsh(  # pylint:disable=too-many-locals
        filepath, region_coord_name="sector"):
    """
    Read CMIP6 concentrations global and hemispheric mean data

    Parameters
    ----------
    filepath : str
        Filepath from which to read the data

    region_coord_name : str
        The name of the co-ordinate which represents the region in the datafile.

    Returns
    -------
    :obj:`ScmDataFrame`
        :obj:`ScmDataFrame` containing the global and hemispheric mean data

    Raises
    ------
    AssertionError
        Defensive assertion: the code is being used in an unexpected way
    """
    loaded_cube = iris.load_cube(filepath)
    checked_cube = _check_cube_and_adjust_if_needed(loaded_cube)

    region_map = {
        "GM": "World",
        "NH": "World|Northern Hemisphere",
        "SH": "World|Southern Hemisphere",
    }
    unit_map = {"1.e^-6": "ppm", "1.e^-9": "ppb", "1.e^-12": "ppt"}

    timeseries_cubes = {}
    for region_coord in checked_cube.coord(region_coord_name):
        if len(list(region_coord.cells())) != 1:  # pragma: no cover
            raise AssertionError("Should only have one point now")

        original_names = {
            int(v.split(":")[0].strip()): v.split(":")[1].strip()
            for v in region_coord.attributes["original_names"].split(";")
        }
        original_regions = {
            k: v.split("_")[-1]
            for k, v in original_names.items()
        }
        region_coord_point = region_coord.cell(0).point
        region = region_map[original_regions[region_coord_point]]
        if checked_cube.shape[1] != 3 or checked_cube.shape[0] == 3:
            raise AssertionError("cube data shape isn't as expected")

        checked_cube.attributes["variable"] = checked_cube.var_name
        checked_cube.attributes[
            "variable_standard_name"] = checked_cube.standard_name
        checked_cube.attributes["region"] = region
        if checked_cube.attributes["source_id"].startswith("UoM-CMIP"):
            scenario = "historical"
            model = "unspecified"
        else:
            scenario = "-".join("ssp{}".format(
                checked_cube.attributes["source_id"].split("ssp")[1]).split(
                    "-")[:-3])
            model = (
                checked_cube.attributes["source_id"].split("-ssp")[0].replace(
                    "UoM-", ""))
        checked_cube.attributes["scenario"] = scenario
        checked_cube.attributes["model"] = model
        checked_cube.attributes["climate_model"] = "MAGICC7"
        checked_cube.attributes["member_id"] = "unspecified"

        helper_region = SCMCube()
        helper_region.cube = checked_cube[:, region_coord_point]
        helper_region.cube.remove_coord(region_coord_name)
        timeseries_cubes[region] = helper_region

    output = (helper_region.convert_scm_timeseries_cubes_to_openscmdata(
        timeseries_cubes).timeseries().reset_index())
    output["unit"] = output["unit"].map(unit_map)
    output["model"] = model
    output = ScmDataFrame(output)

    return output

示例#6

0

显示文件

def get_hfds_expected_results():
    sftof_fracs = 100 - SURFACE_FRACS
    ocean_weights = sftof_fracs * AREA_WEIGHTS
    world_values = np.sum(np.sum(RAW_DATA * ocean_weights, axis=2),
                          axis=1) / np.sum(ocean_weights)

    world_ocean_values = world_values

    nh_area_weights = np.copy(AREA_WEIGHTS)
    nh_area_weights[2, :] = 0
    # we do these by hand: yes they're very slow but that's the point
    world_nh_ocean_values = np.array([
        (30 * 100 + 40 * 70 + 50 * 100 + 60 * 90) * 1.2 +
        (110 * 20 + 190 * 100 + 260 * 50) * 2,
        (0 * 100 + 15 * 70 + 45 * 100 + 90 * 90) * 1.2 +
        (300 * 20 + 450 * 100 + 270 * 50) * 2,
        (60 * 100 + 120 * 70 + 60 * 100 + 60 * 90) * 1.2 +
        (510 * 20 + 220 * 100 + 280 * 50) * 2,
    ]) / ((100 + 70 + 100 + 90) * 1.2 + (20 + 100 + 50) * 2)
    world_nh_values = world_nh_ocean_values

    sh_area_weights = np.copy(AREA_WEIGHTS)
    sh_area_weights[:2, :] = 0
    world_sh_ocean_values = np.array([
        (3 * 80 + 60 * 90 + 20 * 49 + 40 * 85) * 1.1,
        (10 * 80 + 70 * 90 + 90 * 49 + 130 * 85) * 1.1,
        (50 * 80 + 60 * 90 + 55 * 49 + 60 * 85) * 1.1,
    ]) / ((80 + 90 + 49 + 85) * 1.1)
    world_sh_values = world_sh_ocean_values

    world_north_atlantic_values = np.array([260, 270, 280])

    world_elnino_values = np.array([190, 450, 220])

    data = np.vstack([
        world_values,
        world_ocean_values,
        world_nh_values,
        world_sh_values,
        world_nh_ocean_values,
        world_sh_ocean_values,
        world_north_atlantic_values,
        world_elnino_values,
    ]).T

    exp = ScmDataFrame(
        data=data,
        index=SCMDF_TIME,
        columns={
            "model":
            "unspecified",
            "scenario":
            "experiment",
            "region": [
                "World",
                "World|Ocean",
                "World|Northern Hemisphere",
                "World|Southern Hemisphere",
                "World|Northern Hemisphere|Ocean",
                "World|Southern Hemisphere|Ocean",
                "World|North Atlantic Ocean",
                "World|El Nino N3.4",
            ],
            "variable":
            "hfds",
            "unit":
            "W m^-2",
            "climate_model":
            "model",
            "activity_id":
            "cmip5",
            "member_id":
            "realisation",
            "variable_standard_name":
            "surface_downward_heat_flux_in_sea_water",
            "mip_era":
            "CMIP5",
        },
    )
    exp.metadata = {
        "calendar":
        "gregorian",
        "modeling_realm":
        "ocean",
        "Conventions":
        "CF-1.5",
        "crunch_source_files":
        "Files: ['/cmip5/experiment/Omon/hfds/model/realisation/hfds_Omon_model_experiment_realisation_185001-185003.nc']; sftof: ['/cmip5/experiment/fx/sftof/model/r0i0p0/sftof_fx_model_experiment_r0i0p0.nc']; areacello: ['/cmip5/experiment/fx/areacello/model/r0i0p0/areacello_fx_model_experiment_r0i0p0.nc']",
    }
    exp = _add_land_area_metadata(exp, realm="ocean")

    return exp

示例#7

0

显示文件

def get_gpp_expected_results():
    land_weights = SURFACE_FRACS * AREA_WEIGHTS
    world_values = np.sum(np.sum(RAW_DATA * land_weights, axis=2),
                          axis=1) / np.sum(land_weights)

    world_land_values = world_values

    nh_area_weights = np.copy(AREA_WEIGHTS)
    nh_area_weights[2, :] = 0
    # we do these by hand: yes they're very slow but that's the point
    world_nh_land_values = np.array([
        (40 * 30 + 60 * 10) * 1.2 + (110 * 80 + 120 * 100 + 260 * 50) * 2,
        (15 * 30 + 90 * 10) * 1.2 + (300 * 80 + 350 * 100 + 270 * 50) * 2,
        (120 * 30 + 60 * 10) * 1.2 + (510 * 80 + 432 * 100 + 280 * 50) * 2,
    ]) / ((30 + 10) * 1.2 + (80 + 100 + 50) * 2)
    world_nh_values = world_nh_land_values

    sh_area_weights = np.copy(AREA_WEIGHTS)
    sh_area_weights[:2, :] = 0
    world_sh_land_values = np.array([
        (3 * 20 + 60 * 10 + 20 * 51 + 40 * 15) * 1.1,
        (10 * 20 + 70 * 10 + 90 * 51 + 130 * 15) * 1.1,
        (50 * 20 + 60 * 10 + 55 * 51 + 60 * 15) * 1.1,
    ]) / ((20 + 10 + 51 + 15) * 1.1)
    world_sh_values = world_sh_land_values

    data = np.vstack([
        world_values,
        world_land_values,
        world_nh_values,
        world_sh_values,
        world_nh_land_values,
        world_sh_land_values,
    ]).T

    exp = ScmDataFrame(
        data=data,
        index=SCMDF_TIME,
        columns={
            "model":
            "unspecified",
            "scenario":
            "experiment",
            "region": [
                "World",
                "World|Land",
                "World|Northern Hemisphere",
                "World|Southern Hemisphere",
                "World|Northern Hemisphere|Land",
                "World|Southern Hemisphere|Land",
            ],
            "variable":
            "gpp",
            "unit":
            "kg m^-2 s^-1",
            "climate_model":
            "model",
            "activity_id":
            "cmip5",
            "member_id":
            "realisation",
            "variable_standard_name":
            "gross_primary_productivity_of_carbon",
            "mip_era":
            "CMIP5",
        },
    )
    exp.metadata = {
        "calendar":
        "gregorian",
        "modeling_realm":
        "land",
        "Conventions":
        "CF-1.5",
        "crunch_source_files":
        "Files: ['/cmip5/experiment/Lmon/gpp/model/realisation/gpp_Lmon_model_experiment_realisation_185001-185003.nc']; sftlf: ['/cmip5/experiment/fx/sftlf/model/r0i0p0/sftlf_fx_model_experiment_r0i0p0.nc']; areacella: ['/cmip5/experiment/fx/areacella/model/r0i0p0/areacella_fx_model_experiment_r0i0p0.nc']",
    }
    exp = _add_land_area_metadata(exp, realm="land")

    return exp

示例#8

0

显示文件

def get_rsdt_expected_results():
    world_values = np.sum(np.sum(RAW_DATA * AREA_WEIGHTS, axis=2),
                          axis=1) / np.sum(AREA_WEIGHTS)

    land_weights = SURFACE_FRACS * AREA_WEIGHTS
    world_land_values = np.sum(np.sum(RAW_DATA * land_weights, axis=2),
                               axis=1) / np.sum(land_weights)

    ocean_weights = (100 - SURFACE_FRACS) * AREA_WEIGHTS
    world_ocean_values = np.sum(np.sum(RAW_DATA * ocean_weights, axis=2),
                                axis=1) / np.sum(ocean_weights)

    nh_area_weights = np.copy(AREA_WEIGHTS)
    nh_area_weights[2, :] = 0
    world_nh_values = np.sum(np.sum(RAW_DATA * nh_area_weights, axis=2),
                             axis=1) / np.sum(nh_area_weights)

    sh_area_weights = np.copy(AREA_WEIGHTS)
    sh_area_weights[:2, :] = 0
    world_sh_values = np.sum(np.sum(RAW_DATA * sh_area_weights, axis=2),
                             axis=1) / np.sum(sh_area_weights)

    # we do these by hand: yes they're very slow but that's the point
    world_nh_land_values = np.array([
        (40 * 30 + 60 * 10) * 1.2 + (110 * 80 + 120 * 100 + 260 * 50) * 2,
        (15 * 30 + 90 * 10) * 1.2 + (300 * 80 + 350 * 100 + 270 * 50) * 2,
        (120 * 30 + 60 * 10) * 1.2 + (510 * 80 + 432 * 100 + 280 * 50) * 2,
    ]) / ((30 + 10) * 1.2 + (80 + 100 + 50) * 2)

    world_sh_land_values = np.array([
        (3 * 20 + 60 * 10 + 20 * 51 + 40 * 15) * 1.1,
        (10 * 20 + 70 * 10 + 90 * 51 + 130 * 15) * 1.1,
        (50 * 20 + 60 * 10 + 55 * 51 + 60 * 15) * 1.1,
    ]) / ((20 + 10 + 51 + 15) * 1.1)

    world_nh_ocean_values = np.array([
        (30 * 100 + 40 * 70 + 50 * 100 + 60 * 90) * 1.2 +
        (110 * 20 + 190 * 100 + 260 * 50) * 2,
        (0 * 100 + 15 * 70 + 45 * 100 + 90 * 90) * 1.2 +
        (300 * 20 + 450 * 100 + 270 * 50) * 2,
        (60 * 100 + 120 * 70 + 60 * 100 + 60 * 90) * 1.2 +
        (510 * 20 + 220 * 100 + 280 * 50) * 2,
    ]) / ((100 + 70 + 100 + 90) * 1.2 + (20 + 100 + 50) * 2)

    world_sh_ocean_values = np.array([
        (3 * 80 + 60 * 90 + 20 * 49 + 40 * 85) * 1.1,
        (10 * 80 + 70 * 90 + 90 * 49 + 130 * 85) * 1.1,
        (50 * 80 + 60 * 90 + 55 * 49 + 60 * 85) * 1.1,
    ]) / ((80 + 90 + 49 + 85) * 1.1)

    world_north_atlantic_values = np.array([260, 270, 280])

    world_elnino_values = np.array([190, 450, 220])

    data = np.vstack([
        world_values,
        world_land_values,
        world_ocean_values,
        world_nh_values,
        world_sh_values,
        world_nh_land_values,
        world_sh_land_values,
        world_nh_ocean_values,
        world_sh_ocean_values,
        world_north_atlantic_values,
        world_elnino_values,
    ]).T

    exp = ScmDataFrame(
        data=data,
        index=SCMDF_TIME,
        columns={
            "model":
            "unspecified",
            "scenario":
            "experiment",
            "region": [
                "World",
                "World|Land",
                "World|Ocean",
                "World|Northern Hemisphere",
                "World|Southern Hemisphere",
                "World|Northern Hemisphere|Land",
                "World|Southern Hemisphere|Land",
                "World|Northern Hemisphere|Ocean",
                "World|Southern Hemisphere|Ocean",
                "World|North Atlantic Ocean",
                "World|El Nino N3.4",
            ],
            "variable":
            "rsdt",
            "unit":
            "W m^-2",
            "climate_model":
            "model",
            "activity_id":
            "cmip5",
            "member_id":
            "realisation",
            "variable_standard_name":
            "toa_incoming_shortwave_flux",
            "mip_era":
            "CMIP5",
        },
    )

    exp.metadata = {
        "calendar":
        "gregorian",
        "land_fraction":
        np.sum(AREA_WEIGHTS * SURFACE_FRACS) / (100 * np.sum(AREA_WEIGHTS)),
        "land_fraction_northern_hemisphere":
        np.sum(nh_area_weights * SURFACE_FRACS) /
        (100 * np.sum(nh_area_weights)),
        "land_fraction_southern_hemisphere":
        np.sum(sh_area_weights * SURFACE_FRACS) /
        (100 * np.sum(sh_area_weights)),
        "modeling_realm":
        "atmos",
        "Conventions":
        "CF-1.5",
        "crunch_source_files":
        "Files: ['/cmip5/experiment/Amon/rsdt/model/realisation/rsdt_Amon_model_experiment_realisation_185001-185003.nc']; areacella: ['/cmip5/experiment/fx/areacella/model/r0i0p0/areacella_fx_model_experiment_r0i0p0.nc']; sftlf: ['/cmip5/experiment/fx/sftlf/model/r0i0p0/sftlf_fx_model_experiment_r0i0p0.nc']",
    }
    exp = _add_land_area_metadata(exp, realm="atmos")

    return exp

示例#9

0

显示文件

文件： 0_database-generation.py 项目： annefou/AR6_CH6_RCMIPFIGS

if not os.path.isdir(OUTPUT_DATABASE_PATH):
    make_folders(OUTPUT_DATABASE_PATH)

if not os.path.isdir(OBS_DATABASE_PATH):
    make_folders(OBS_DATABASE_PATH)

# %% [markdown]
# ## Protocol

# %%
SCENARIO_PROTOCOL = os.path.join(INPUT_DATA_DIR, "data", "protocol",
                                 "rcmip-emissions-annual-means.csv")

# %%
protocol_db = ScmDataFrame(SCENARIO_PROTOCOL)
protocol_db.head()

# %%
protocol_db["scenario"].unique()

# %%
DATA_PROTOCOL = os.path.join(
    INPUT_DATA_DIR,
    "data",
    "submission-template",
    "rcmip-data-submission-template.xlsx",
)

# %%
protocol_variables = pd.read_excel(DATA_PROTOCOL,

示例#10

0

显示文件

文件： test_scmdf.py 项目： znicholls/FAIR

import os.path

import numpy as np
import numpy.testing as npt
import pytest
from scmdata import ScmDataFrame

from fair.tools.scmdf import scmdf_to_emissions, _get_fair_col_unit_context

scenarios_to_test = ["ssp119", "ssp245", "ssp585"]
scenarios_to_test = ["ssp119"]
SCENARIOS = ScmDataFrame(
    os.path.join(os.path.dirname(__file__),
                 "rcmip_scen_ssp_world_emissions.csv")).filter(
                     scenario=scenarios_to_test)

SSP245_EMMS = ScmDataFrame(
    os.path.join(os.path.dirname(__file__), "..", "..", "fair", "SSPs", "data",
                 "rcmip-emissions-annual-means-4-0-0-ssp-only.csv")).filter(
                     scenario="ssp245")

MODEL_SCEN_DFS = []
for scen_scmdf in SCENARIOS.groupby("scenario"):
    for scen_model_scmdf in scen_scmdf.groupby("model"):
        MODEL_SCEN_DFS.append(scen_model_scmdf)


@pytest.fixture(params=MODEL_SCEN_DFS)
def scen_model_scmdfs(request):
    yield request.param

示例#11

0

显示文件

文件： wranglers.py 项目： smutch/netcdf-scm

def convert_tuningstruc_to_scmdf(  # pylint:disable=too-many-arguments,too-many-locals
        filepath,
        variable=None,
        region=None,
        unit=None,
        scenario=None,
        model=None):
    """
    Convert a matlab tuningstruc to an ScmDataFrame

    Parameters
    ----------
    filepath : str
        Filepath from which to load the data

    variable : str
        Name of the variable contained in the tuningstruc. If None,
        `convert_tuningstruc_to_scmdf` will attempt to determine it from the input
        file.

    region : str
        Region to which the data in the tuningstruc applies. If None,
        `convert_tuningstruc_to_scmdf` will attempt to determine it from the input
        file.

    unit : str
        Units of the data in the tuningstruc. If None,
        `convert_tuningstruc_to_scmdf` will attempt to determine it from the input
        file.

    scenario : str
        Scenario to which the data in the tuningstruc applies. If None,
        `convert_tuningstruc_to_scmdf` will attempt to determine it from the input
        file.

    model : str
        The (integrated assessment) model which generated the emissions scenario
        associated with the data in the tuningstruc. If None,
        `convert_tuningstruc_to_scmdf` will attempt to determine it from the input
        file and if it cannot, it will be set to "unspecified".

    Raises
    ------
    KeyError
        If a metadata variable is not supplied and it cannot be determined from the
        tuningstruc.

    Returns
    -------
    :obj:`ScmDataFrame`
        ScmDataFrame with the tuningstruc data
    """
    dataset = mat4py.loadmat(filepath)

    for m, climate_model in enumerate(dataset["tuningdata"]["modelcodes"]):
        metadata = {
            "variable": [variable],
            "region": [region],
            "unit": [unit],
            "climate_model": [climate_model],
            "scenario": [scenario],
            "model": [model],
        }
        for k, v in metadata.items():
            if v == [None]:
                try:
                    metadata[k] = [dataset["tuningdata"]["model"][m][k]]
                except KeyError:
                    if k == "model":
                        metadata[k] = ["unspecified"]
                        continue

                    error_msg = "Cannot determine {} from file: " "{}".format(
                        k, filepath)
                    raise KeyError(error_msg)

        data = np.asarray(dataset["tuningdata"]["model"][m]["data"])
        if len(data) != 2:
            data = data.T
        scmdf = ScmDataFrame(data=data[1], index=data[0], columns=metadata)

        try:
            ref_df.append(scmdf, inplace=True)
        except NameError:
            ref_df = scmdf
    return ref_df

示例#12

0

显示文件

文件： database_generation.py 项目： annefou/AR6_CH6_RCMIPFIGS

def unify_units(in_df, protocol_variables, exc_info=False):
    out_df = in_df.copy()
    for variable in tqdm.tqdm_notebook(out_df["variable"].unique()):
        if variable.startswith("Radiative Forcing|Anthropogenic|Albedo Change"):
            target_unit = protocol_variables[
                protocol_variables["variable"]
                == "Radiative Forcing|Anthropogenic|Albedo Change"
                ]["unit"].iloc[0]

        elif variable.startswith(
                "Effective Radiative Forcing|Anthropogenic|Albedo Change"
        ):
            target_unit = protocol_variables[
                protocol_variables["variable"]
                == "Effective Radiative Forcing|Anthropogenic|Albedo Change"
                ]["unit"].iloc[0]

        elif variable.startswith("Carbon Pool"):
            target_unit = protocol_variables[
                protocol_variables["variable"] == "Carbon Pool|Atmosphere"
                ]["unit"].iloc[0]

        elif "Other" in variable:
            target_unit = protocol_variables[
                protocol_variables["variable"]
                == "{}".format(variable.split("|Other")[0])
                ]["unit"].iloc[0]

        elif any([variable.endswith(suf) for suf in ["quantile", "mean", "stddev"]]):
            try:
                target_unit = protocol_variables[
                    protocol_variables["variable"] == "|".join(variable.split("|")[:-1])
                    ]["unit"].iloc[0]
            except:
                logger.exception(
                    f"Failed to find unit for {variable}", exc_info=exc_info
                )
                continue
        else:
            try:
                target_unit = protocol_variables[
                    protocol_variables["variable"] == variable
                    ]["unit"].iloc[0]
            except:
                logger.exception(
                    f"Failed to find unit for {variable}", exc_info=exc_info
                )
                continue

        try:
            if "CH4" in target_unit:
                out_df = out_df.convert_unit(
                    target_unit, variable=variable, context="CH4_conversions"
                )
                continue

            if "NOx" in target_unit:
                out_df = out_df.convert_unit(
                    target_unit, variable=variable, context="NOx_conversions"
                )
                continue

            if target_unit == "Dimensionless":
                target_unit = "dimensionless"

            out_df = out_df.convert_unit(target_unit, variable=variable)
        except:
            current_unit = out_df.filter(variable=variable)["unit"].unique()
            logger.exception(
                f"Failed for {variable} with target unit: {target_unit} and current_unit: {current_unit}",
                exc_info=exc_info,
            )

    out_df = out_df.timeseries().reset_index()
    out_df["unit_context"] = out_df["unit_context"].fillna("not_required")
    return ScmDataFrame(out_df)

示例#13

0

显示文件

文件： 1_preprocess_data.py 项目： annefou/AR6_CH6_RCMIPFIGS

# %%
quantile = 'quantile'
relevant_files = [str(p) for p in relevant_files if quantile not in p]
print("Number of relevant files: {}".format(len(relevant_files)))
relevant_files

# %% [markdown]
# ### Read in all variables:

# %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"}
db = []
for rf in tqdm.tqdm_notebook(relevant_files):
    # print(rf.endswith('sf'))
    if rf.endswith(".csv"):
        loaded = ScmDataFrame(rf)
    else:
        loaded = ScmDataFrame(rf, sheet_name="your_data")
    db.append(loaded.filter(variable=variables_erf,
                            scenario=scenarios_fl))  # variables_of_interest))
print(db)
db = df_append(db).timeseries().reset_index()
db["unit"] = db["unit"].apply(lambda x: x.replace(
    "Dimensionless", "dimensionless") if isinstance(x, str) else x)
clear_output()
db = ScmDataFrame(db)
db.head()

# %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"}
db[variable].unique()