def test_manual_aggregation():
    """Test that aggregating an ensemble using
    RealizationCombination is the same as calling agg() on the
    ensemble"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")

    # Aggregate an ensemble into a virtual "mean" realization
    mean = reekensemble.agg("mean")

    # Combine the ensemble members directly into a mean computation.
    # Also returns a virtual realization.
    manualmean = (1 / 5 *
                  (reekensemble[0] + reekensemble[1] + reekensemble[2] +
                   reekensemble[3] + reekensemble[4]))

    # Commutativity proof:
    assert mean["parameters"]["RMS_SEED"] == manualmean["parameters"][
        "RMS_SEED"]
Exemplo n.º 2
0
def test_get_df_merge():
    """Testing merge support in get_df()"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0"
    )
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_scalar("npv.txt")
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")
    outputs = reekensemble.load_txt("outputs.txt")
    vens = reekensemble.to_virtual()

    params = vens.get_df("parameters.txt")
    smrycount = len(vens.get_df("unsmry--yearly").columns)
    smryparams = vens.get_df("unsmry--yearly", merge="parameters")

    # The "minus 1" is due to the REAL column being present in both tables.
    assert len(smryparams.columns) == len(params.columns) + smrycount - 1

    paramsoutputs = vens.get_df("parameters", merge=["outputs"])
    assert len(paramsoutputs.columns) == len(params.columns) + len(outputs.columns) - 1

    assert (
        len(vens.get_df("unsmry--yearly", merge=["parameters", "outputs"]).columns)
        == smrycount + len(params.columns) + len(outputs.columns) - 2
    )

    assert (
        len(vens.get_df("parameters", merge="npv.txt").columns)
        == len(params.columns) + 1
    )
    # Symmetry:
    assert (
        len(vens.get_df("npv.txt", merge="parameters.txt").columns)
        == len(params.columns) + 1
    )

    # Merge with zone data, inject a mocked dataframe to the realization:
    vens.data["fipnum2zone"] = pd.DataFrame(
        columns=["FIPNUM", "ZONE"],
        data=[
            [1, "UpperReek"],
            [2, "MidReek"],
            [3, "LowerReek"],
            [4, "UpperReek"],
            [5, "MidReek"],
            [6, "LowerReek"],
        ],
    )
    volframe = vens.get_df("simulator_volume_fipnum", merge="fipnum2zone")
    assert "ZONE" in volframe
    assert "FIPNUM" in volframe
    assert "STOIIP_OIL" in volframe
    assert len(volframe["ZONE"].unique()) == 3
Exemplo n.º 3
0
def load_per_real_csv_file_using_fmu(ens_path: str,
                                     csv_file_rel_path: str) -> pd.DataFrame:

    LOGGER.debug(
        f"load_per_real_csv_file_using_fmu() starting - {csv_file_rel_path}")
    timer = PerfTimer()

    scratch_ensemble = ScratchEnsemble("tempEnsName",
                                       ens_path,
                                       autodiscovery=True)
    df = scratch_ensemble.load_csv(csv_file_rel_path)

    LOGGER.debug(
        f"load_per_real_csv_file_using_fmu() finished in: {timer.elapsed_s():.2f}s"
    )

    return df
Exemplo n.º 4
0
def test_ensemble_aggregations(tmpdir):
    """Test aggregations of ensembles, that
    is taking means, medians, p10 and so on, producing
    virtual realizations"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    reekensemble.load_smry(time_index="monthly", column_keys=["F*"])
    reekensemble.load_smry(time_index="yearly", column_keys=["F*"])
    reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv")
    reekensemble.load_scalar("npv.txt", convert_numeric=True)

    stats = {
        "mean": reekensemble.agg("mean"),
        "median": reekensemble.agg("median"),
        "min": reekensemble.agg("min"),
        "max": reekensemble.agg("max"),
        "p10": reekensemble.agg("p10"),  # low estimate
        "p90": reekensemble.agg("p90"),  # high estimate
    }

    tmpdir.chdir()
    stats["min"].to_disk("virtreal_min", delete=True)
    stats["max"].to_disk("virtreal_max", delete=True)
    stats["mean"].to_disk("virtreal_mean", delete=True)

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["p10"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p10"]["parameters.txt"]["RMS_SEED"] <=
            stats["median"]["parameters.txt"]["RMS_SEED"])
    assert (stats["median"]["parameters.txt"]["RMS_SEED"] <=
            stats["p90"]["parameters.txt"]["RMS_SEED"])
    assert (stats["p90"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["mean"]["parameters.txt"]["RMS_SEED"])
    assert (stats["min"]["parameters.txt"]["RMS_SEED"] <=
            stats["max"]["parameters.txt"]["RMS_SEED"])

    assert (stats["min"]["unsmry--monthly"]["FOPT"].iloc[-1] <
            stats["max"]["unsmry--monthly"]["FOPT"].iloc[-1])

    # .loc[2] corresponds to FIPNUM=3
    assert (stats["min"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"] <
            stats["mean"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"])
    assert (stats["mean"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"] <
            stats["max"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"])

    # Aggregation of STATUS also works. Note that min and max
    # works for string columns, so the available data will vary
    # depending on aggregation method
    assert (stats["p10"]["STATUS"].iloc[49]["DURATION"] <
            stats["max"]["STATUS"].iloc[49]["DURATION"])
    # job 49 is the Eclipse forward model

    assert "npv.txt" in stats["mean"].keys()
    assert stats["mean"]["npv.txt"] == 3382.5

    # Test agg(excludekeys=..)
    assert "STATUS" not in reekensemble.agg("mean",
                                            excludekeys="STATUS").keys()
    assert "STATUS" not in reekensemble.agg("mean",
                                            keylist=["parameters.txt"]).keys()

    assert (reekensemble.agg("p01")["parameters"]["RMS_SEED"] <
            reekensemble.agg("p99")["parameters"]["RMS_SEED"])

    with pytest.raises(ValueError):
        reekensemble.agg("foobar")

    # Check that include/exclude functionality in agg() works:
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys="parameters.txt").keys())
    assert ("parameters.txt"
            not in reekensemble.agg("mean",
                                    excludekeys=["parameters.txt"]).keys())
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist="STATUS").keys()
    assert "parameters.txt" not in reekensemble.agg("mean",
                                                    keylist=["STATUS"]).keys()

    # Shorthand notion works for keys to include, but they
    # should get returned with fully qualified paths.
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist="unsmry--yearly").keys())
    assert ("share/results/tables/unsmry--yearly.csv"
            in reekensemble.agg("mean", keylist=["unsmry--yearly"]).keys())
    assert isinstance(
        reekensemble.agg("mean",
                         keylist="unsmry--yearly").get_df("unsmry--yearly"),
        pd.DataFrame,
    )
from fmu.ensemble import ScratchEnsemble

# Gather selected CSV files from each realization
# and dump them (each indivitually merged with parameters.txt)
# to share/results

ens = ScratchEnsemble("", "realization-*/iter-0")
csv_files = [
    "volumes/geogrid--oil.csv",
    "volumes/simgrid--oil.csv",
    "volumes/simulator_volume_fipnum.csv",
    "tables/rft.csv",
    "tables/unsmry--monthly.csv",
    "tables/equil.csv",
    "tables/relperm.csv",
    "tables/pvt.csv",
]

for file in csv_files:
    ens.load_csv("share/results/" + file)
    ens.get_df(file.split("/")[1],
               merge="parameters.txt").to_csv("share/results/" + file,
                                              index=False)
Exemplo n.º 6
0
def test_reek001(tmp="TMP"):
    """Test import of a stripped 5 realization ensemble"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    reekensemble = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    assert isinstance(reekensemble, ScratchEnsemble)
    assert reekensemble.name == "reektest"
    assert len(reekensemble) == 5

    assert isinstance(reekensemble[0], ScratchRealization)

    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5
    assert (len(reekensemble.files[reekensemble.files.LOCALPATH ==
                                   "parameters.txt"]) == 5)
    assert len(
        reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5

    statusdf = reekensemble.get_df("STATUS")
    assert len(statusdf) == 250  # 5 realizations, 50 jobs in each
    assert "REAL" in statusdf.columns
    assert "FORWARD_MODEL" in statusdf.columns
    statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index()
    assert "DURATION" in statusdf.columns  # calculated
    assert "argList" in statusdf.columns  # from jobs.json

    # Sample check the duration for RMS in realization 4:
    assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195

    # STATUS in real4 is modified to simulate that Eclipse never finished:
    assert numpy.isnan(statusdf.loc[4,
                                    "ECLIPSE100_2014.2"]["DURATION"].values[0])

    if not os.path.exists(tmp):
        os.mkdir(tmp)
    statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False)

    # Parameters.txt
    paramsdf = reekensemble.load_txt("parameters.txt")
    assert len(paramsdf) == 5  # 5 realizations
    paramsdf = reekensemble.parameters  # also test as property
    paramsdf = reekensemble.get_df("parameters.txt")
    assert len(paramsdf) == 5
    assert len(paramsdf.columns) == 26  # 25 parameters, + REAL column
    paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False)

    # Check that the ensemble object has not tainted the realization dataframe:
    assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt")

    # The column FOO in parameters is only present in some, and
    # is present with NaN in real0:
    assert "FOO" in reekensemble.parameters.columns
    assert len(reekensemble.parameters["FOO"].dropna()) == 1
    # (NaN ine one real, and non-existing in the others is the same thing)

    # Test loading of another txt file:
    reekensemble.load_txt("outputs.txt")
    assert "NPV" in reekensemble.load_txt("outputs.txt").columns
    # Check implicit discovery
    assert "outputs.txt" in reekensemble.files["LOCALPATH"].values
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # File discovery:
    csvvolfiles = reekensemble.find_files("share/results/volumes/*csv",
                                          metadata={"GRID": "simgrid"})
    assert isinstance(csvvolfiles, pd.DataFrame)
    assert "REAL" in csvvolfiles
    assert "FULLPATH" in csvvolfiles
    assert "LOCALPATH" in csvvolfiles
    assert "BASENAME" in csvvolfiles
    # Check the explicit metadata:
    assert "GRID" in csvvolfiles
    assert csvvolfiles["GRID"].unique() == ["simgrid"]

    reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False)

    # Check that rediscovery does not mess things up:

    filecount = len(reekensemble.files)
    newfiles = reekensemble.find_files("share/results/volumes/*csv")
    # Also note that we skipped metadata here in rediscovery:

    assert len(reekensemble.files) == filecount
    assert len(newfiles) == len(csvvolfiles)

    # The last invocation of find_files() should not return the metadata
    assert len(newfiles.columns) + 1 == len(csvvolfiles.columns)

    # FULLPATH should always contain absolute paths
    assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]])

    # The metadata in the rediscovered files should have been removed
    assert len(
        reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0

    # CSV files
    csvpath = "share/results/volumes/simulator_volume_fipnum.csv"
    vol_df = reekensemble.load_csv(csvpath)

    # Check that we have not tainted the realization dataframes:
    assert "REAL" not in reekensemble._realizations[0].get_df(csvpath)

    assert "REAL" in vol_df
    assert len(vol_df["REAL"].unique()) == 3  # missing in 2 reals
    vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False)

    # Test retrival of cached data
    vol_df2 = reekensemble.get_df(csvpath)

    assert "REAL" in vol_df2
    assert len(vol_df2["REAL"].unique()) == 3  # missing in 2 reals

    # Realization deletion:
    reekensemble.remove_realizations([1, 3])
    assert len(reekensemble) == 3

    # Readd the same realizations
    reekensemble.add_realizations([
        testdir + "/data/testensemble-reek001/" + "realization-1/iter-0",
        testdir + "/data/testensemble-reek001/" + "realization-3/iter-0",
    ])
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24

    # File discovery must be repeated for the newly added realizations
    reekensemble.find_files(
        "share/results/volumes/" + "simulator_volume_fipnum.csv",
        metadata={"GRID": "simgrid"},
    )
    assert len(reekensemble.files) == 25
    # Test addition of already added realization:
    reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" +
                                  "realization-1/iter-0")
    assert len(reekensemble) == 5
    assert len(reekensemble.files) == 24  # discovered files are lost!

    keycount = len(reekensemble.keys())
    reekensemble.remove_data("parameters.txt")
    assert len(reekensemble.keys()) == keycount - 1
Exemplo n.º 7
0
def test_get_df():
    """Test the data retrieval functionality

    get_df() in the ensemble context is an aggregator, that will aggregate
    data from individual realaizations to the ensemble level, with
    optional merging capabilities performed on realization level."""
    testdir = os.path.dirname(os.path.abspath(__file__))
    ens = ScratchEnsemble(
        "reektest",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0")
    smry = ens.load_smry(column_keys="FO*", time_index="yearly")
    assert not ens.get_df("unsmry--yearly").empty
    assert not ens.get_df("unsmry--yearly.csv").empty
    assert not ens.get_df("share/results/tables/unsmry--yearly").empty
    assert not ens.get_df("share/results/tables/unsmry--yearly.csv").empty
    with pytest.raises(KeyError):
        # pylint: disable=pointless-statement
        ens.get_df("unsmry--monthly")
    ens.load_smry(column_keys="FO*", time_index="monthly")
    assert not ens.get_df("unsmry--monthly").empty
    with pytest.raises(KeyError):
        # pylint: disable=pointless-statement
        ens.get_df("unsmry-monthly")

    # Tests that we can do merges directly:
    params = ens.get_df("parameters.txt")
    smryparams = ens.get_df("unsmry--yearly", merge="parameters")
    # The set union is to handle the REAL column present in both smry and params:
    assert len(smryparams.columns) == len(
        set(smry.columns).union(params.columns))

    # Test multiple merges:
    outputs = ens.load_txt("outputs.txt")
    assert len(
        ens.get_df("unsmry--yearly",
                   merge=["parameters", "outputs.txt"]).columns) == len(
                       set(smry.columns).union(params.columns).union(
                           outputs.columns))

    # Try merging dataframes:
    ens.load_csv("share/results/volumes/simulator_volume_fipnum.csv")

    # Inject a mocked dataframe to the realization, there is
    # no "add_data" API for ensembles, but we can use the apply()
    # functionality
    def fipnum2zone():
        """Helper function for injecting mocked frame into
        each realization"""
        return pd.DataFrame(
            columns=["FIPNUM", "ZONE"],
            data=[
                [1, "UpperReek"],
                [2, "MidReek"],
                [3, "LowerReek"],
                [4, "UpperReek"],
                [5, "MidReek"],
                [6, "LowerReek"],
            ],
        )

    ens.apply(fipnum2zone, localpath="fipnum2zone")
    volframe = ens.get_df("simulator_volume_fipnum", merge="fipnum2zone")

    assert "ZONE" in volframe
    assert "FIPNUM" in volframe
    assert "STOIIP_OIL" in volframe
    assert len(volframe["ZONE"].unique()) == 3

    # Merge with scalar data:
    ens.load_scalar("npv.txt")
    vol_npv = ens.get_df("simulator_volume_fipnum", merge="npv.txt")
    # (this particular data combination does not really make sense)
    assert "STOIIP_OIL" in vol_npv
    assert "npv.txt" in vol_npv