示例#1
0
def test_ens_mismatch():
    """Test calculation of mismatch to ensemble data"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")

    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})

    mismatch = obs.mismatch(ens)

    assert "L1" in mismatch.columns
    assert "L2" in mismatch.columns
    assert "MISMATCH" in mismatch.columns
    assert "OBSKEY" in mismatch.columns
    assert "OBSTYPE" in mismatch.columns
    assert "REAL" in mismatch.columns
    assert len(mismatch) == len(ens) * 1  # number of observation units.

    fopt_rank = mismatch.sort_values("L2", ascending=True)["REAL"].values
    assert fopt_rank[0] == 2  # closest realization
    assert fopt_rank[-1] == 1  # worst realization

    # Try again with reference to non-existing vectors:
    obs = Observations(
        {"smryh": [{
            "key": "FOPTFLUFF",
            "histvec": "FOPTFLUFFH"
        }]})
    mismatch = obs.mismatch(ens)
    assert mismatch.empty
示例#2
0
def test_smry():
    """Test the support for smry observations, these are
    observations relating to summary data, but where
    the observed values are specified in yaml, not through
    *H summary variables"""

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    obs = Observations(
        testdir
        + "/data/testensemble-reek001/"
        + "/share/observations/"
        + "observations.yml"
    )
    real = ScratchRealization(
        testdir + "/data/testensemble-reek001/" + "realization-0/iter-0/"
    )

    # Compute the mismatch from this particular observation set to the
    # loaded realization.
    mismatch = obs.mismatch(real)

    assert len(mismatch) == 21  # later: implement counting in the obs object
    assert mismatch.L1.sum() > 0
    assert mismatch.L2.sum() > 0
示例#3
0
def test_ens_failedreals():
    """Ensure we can calculate mismatch where some realizations
    do not have UNSMRY data"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/",
        autodiscovery=False,
    )
    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})
    mismatch = obs.mismatch(ens)

    # There are no UNSMRY found, so the mismatch should be empty:
    assert mismatch.empty

    ens.find_files("eclipse/model/*UNSMRY")
    assert not obs.mismatch(ens).empty

    # Reinitialize
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/",
        autodiscovery=False,
    )

    # Redirect UNSMRY pointer in realizaion 3 so it isn't found
    ens.find_files("eclipse/model/*UNSMRY")
    real3files = ens[3].files
    real3files.loc[real3files["FILETYPE"] == "UNSMRY", "FULLPATH"] = "FOO"

    # Check that we only have EclSum for 2 and not for 3:
    assert ens[2].get_eclsum()
    assert not ens[3].get_eclsum()

    missingsmry = obs.mismatch(ens)
    # Realization 3 should NOT be present now
    assert 3 not in list(missingsmry["REAL"])
    assert not obs.mismatch(ens).empty
示例#4
0
def test_virtual_observations():
    """Construct an virtual(?) observation object from a specific summary vector
    and use it to rank realizations for similarity.
    """

    # We need an ensemble to work with:
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT", "FGPT", "FWPT", "FWCT", "FGOR"],
                  time_index="yearly")

    # And we need some VirtualRealizations
    virtreals = {
        "p90realization": ens.agg("p90"),
        "meanrealization": ens.agg("mean"),
        "p10realization": ens.agg("p10"),
    }

    summaryvector = "FOPT"
    representative_realizations = {}
    for virtrealname, virtreal in six.iteritems(virtreals):
        # Create empty observation object
        obs = Observations({})
        obs.load_smry(virtreal, summaryvector, time_index="yearly")

        # Calculate how far each realization is from this observation set
        # (only one row pr. realization, as FOPTH is only one observation unit)
        mis = obs.mismatch(ens)

        closest_realization = (
            mis.groupby("REAL").sum()["L2"].sort_values().index.values[0])
        representative_realizations[virtrealname] = closest_realization

    assert representative_realizations["meanrealization"] == 4
    assert representative_realizations["p90realization"] == 2
    assert representative_realizations["p10realization"] == 1
示例#5
0
def test_real_mismatch():
    """Test calculation of mismatch from the observation set to a
    realization"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    real = ScratchRealization(testdir + "/data/testensemble-reek001/" +
                              "realization-0/iter-0/")

    real.load_smry()
    real.load_txt("outputs.txt")
    real.load_scalar("npv.txt")

    obs = Observations({
        "txt": [{
            "localpath": "parameters.txt",
            "key": "FWL",
            "value": 1702
        }]
    })
    realmis = obs.mismatch(real)

    # Check layout of returned data
    assert isinstance(realmis, pd.DataFrame)
    assert len(realmis) == 1
    assert "REAL" not in realmis.columns  # should only be there for ensembles.
    assert "OBSTYPE" in realmis.columns
    assert "OBSKEY" in realmis.columns
    assert "DATE" not in realmis.columns  # date is not relevant
    assert "MISMATCH" in realmis.columns
    assert "L1" in realmis.columns
    assert "L2" in realmis.columns

    # Check actually computed values, there should only be one row with data:
    assert realmis.loc[0, "OBSTYPE"] == "txt"
    assert realmis.loc[0, "OBSKEY"] == "parameters.txt/FWL"
    assert realmis.loc[0, "MISMATCH"] == -2
    assert realmis.loc[0, "SIGN"] == -1
    assert realmis.loc[0, "L1"] == 2
    assert realmis.loc[0, "L2"] == 4

    # Another observation set:
    obs2 = Observations({
        "txt": [
            {
                "localpath": "parameters.txt",
                "key": "RMS_SEED",
                "value": 600000000
            },
            {
                "localpath": "outputs.txt",
                "key": "top_structure",
                "value": 3200
            },
        ],
        "scalar": [{
            "key": "npv.txt",
            "value": 3400
        }],
    })
    realmis2 = obs2.mismatch(real)
    assert len(realmis2) == 3
    assert "parameters.txt/RMS_SEED" in realmis2["OBSKEY"].values
    assert "outputs.txt/top_structure" in realmis2["OBSKEY"].values
    assert "npv.txt" in realmis2["OBSKEY"].values

    # assert much more!

    # Test that we can write the observations to yaml
    # and verify that the exported yaml can be reimported
    # and yield the same result
    obs2r = Observations(yaml.full_load(obs2.to_yaml()))
    realmis2r = obs2r.mismatch(real)
    assert np.all(realmis2["MISMATCH"].values.sort() ==
                  realmis2r["MISMATCH"].values.sort())

    # Test use of allocated values:
    obs3 = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})
    fopt_mis = obs3.mismatch(real)
    assert fopt_mis.loc[0, "OBSTYPE"] == "smryh"
    assert fopt_mis.loc[0, "OBSKEY"] == "FOPT"
    assert fopt_mis.loc[0, "L1"] > 0
    assert fopt_mis.loc[0, "L1"] != fopt_mis.loc[0, "L2"]

    # Test mismatch where some data is missing:
    obs4 = Observations({"smryh": [{"key": "FOOBAR", "histvec": "FOOBARH"}]})
    mis_mis = obs4.mismatch(real)
    assert mis_mis.empty

    # This test fails, the consistency check is not implemented.
    # obs_bogus = Observations({'smryh': [{'keddy': 'FOOBAR',
    #                               'histdddvec': 'FOOBARH'}]})
    # mis_mis = obs_bogus.mismatch(real)
    # assert mis_mis.empty

    obs_bogus_scalar = Observations(
        {"scalar": [{
            "key": "nonexistingnpv.txt",
            "value": 3400
        }]})
    # (a warning should be logged)
    assert obs_bogus_scalar.mismatch(real).empty

    obs_bogus_param = Observations({
        "txt": [{
            "localpath": "bogusparameters.txt",
            "key": "RMS_SEED",
            "value": 600000000,
        }]
    })
    # (a warning should be logged)
    assert obs_bogus_param.mismatch(real).empty

    obs_bogus_param = Observations({
        "txt": [{
            "localpath": "parameters.txt",
            "key": "RMS_SEEEEEEED",
            "value": 600000000,
        }]
    })
    # (a warning should be logged)
    assert obs_bogus_param.mismatch(real).empty

    # Non-existing summary key:
    obs_bogus_smry = Observations({
        "smry": [{
            "key":
            "WBP4:OP_XXXXX",
            "observations": [{
                "date": datetime.date(2001, 1, 1),
                "error": 4,
                "value": 251
            }],
        }]
    })
    assert obs_bogus_smry.mismatch(real).empty
示例#6
0
def test_ensset_mismatch():
    """Test mismatch calculation on an EnsembleSet
    """
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    # Copy iter-0 to iter-1, creating an identical ensemble
    # we can load for testing.
    for realizationdir in glob.glob(ensdir + "/realization-*"):
        if os.path.exists(realizationdir + "/iter-1"):
            os.remove(realizationdir + "/iter-1")
        os.symlink(realizationdir + "/iter-0", realizationdir + "/iter-1")

    iter0 = ScratchEnsemble("iter-0", ensdir + "/realization-*/iter-0")
    iter1 = ScratchEnsemble("iter-1", ensdir + "/realization-*/iter-1")

    ensset = EnsembleSet("reek001", [iter0, iter1])

    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})

    mismatch = obs.mismatch(ensset)
    assert "ENSEMBLE" in mismatch.columns
    assert "REAL" in mismatch.columns
    assert len(mismatch) == 10
    assert (mismatch[mismatch.ENSEMBLE == "iter-0"].L1.sum() == mismatch[
        mismatch.ENSEMBLE == "iter-1"].L1.sum())

    # This is quite hard to input in dict-format. Better via YAML..
    obs_pr = Observations({
        "smry": [{
            "key":
            "WBP4:OP_1",
            "comment":
            "Pressure observations well OP_1",
            "observations": [{
                "value": 250,
                "error": 1,
                "date": datetime.date(2001, 1, 1)
            }],
        }]
    })

    mis_pr = obs_pr.mismatch(ensset)
    assert len(mis_pr) == 10

    # We should also be able to input dates as strings, and they
    # should be attempted parsed to datetime.date:
    obs_pr = Observations({
        "smry": [{
            "key":
            "WBP4:OP_1",
            "observations": [{
                "value": 250,
                "error": 1,
                "date": "2001-01-01"
            }],
        }]
    })
    mis_pr2 = obs_pr.mismatch(ensset)
    assert len(mis_pr2) == 10

    # We are strict and DO NOT ALLOW non-ISO dates like this:
    with pytest.raises(ValueError):
        obs_pr = Observations({
            "smry": [{
                "key":
                "WBP4:OP_1",
                "observations": [{
                    "value": 250,
                    "error": 1,
                    "date": "01-01-2001"
                }],
            }]
        })

    # Erroneous date will raise Exception
    # (but a valid date will give an extrapolated value)
    with pytest.raises(ValueError):
        obs_pr = Observations({
            "smry": [{
                "key":
                "WBP4:OP_1",
                "observations": [{
                    "value": 250,
                    "error": 1,
                    "date": "3011-45-443"
                }],
            }]
        })
    obs_extrap = Observations({
        "smry": [{
            "key":
            "WBP4:OP_1",
            "observations": [{
                "value": 250,
                "error": 1,
                "date": "1977-01-01"
            }],
        }]
    })
    assert len(obs_extrap.mismatch(ensset)) == 10  # (5 reals, 2 ensembles)
示例#7
0
def test_vens_mismatch():
    """Test calculation of mismatch to virtualized ensemble data"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT*"], time_index="monthly")

    vens = ens.to_virtual()

    # We don't need time_index now, because monthly is all we have.
    obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]})

    mismatch = obs.mismatch(vens)
    mismatch_raw = obs.mismatch(ens)
    assert isinstance(mismatch, pd.DataFrame)
    assert not mismatch.empty
    assert "L1" in mismatch.columns
    assert "L2" in mismatch.columns
    assert "MISMATCH" in mismatch.columns

    assert mismatch["MISMATCH"].sum() != mismatch_raw["MISMATCH"].sum()

    obs_monthly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "monthly"
        }]
    })
    assert ((mismatch.sort_values("REAL").reset_index(drop=True) ==
             obs_monthly.mismatch(ens).sort_values("REAL").reset_index(
                 drop=True)).all().all())

    # We should be able to do yearly smryh comparisons from virtualized
    # monthly profiles:
    obs_yearly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "yearly"
        }]
    })
    mismatch_yearly = obs_yearly.mismatch(vens)
    assert mismatch_yearly["MISMATCH"].sum() != mismatch["MISMATCH"].sum()

    # When load_smry() is forgotten before virtualization:
    vens = ScratchEnsemble(
        "test", testdir + "/data/testensemble-reek001/" +
        "realization-*/iter-0/").to_virtual()
    with pytest.raises(ValueError):
        obs.mismatch(vens)

    # Removal of one realization in the virtualized ensemble:
    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")
    ens.load_smry(column_keys=["FOPT*"], time_index="monthly")
    vens = ens.to_virtual()
    vens.remove_realizations(2)
    mismatch_subset = obs.mismatch(vens)
    assert 2 not in mismatch_subset["REAL"].unique()
    assert 0 in mismatch_subset["REAL"].unique()
示例#8
0
def test_smryh():
    """Test that smryh mismatch calculation will respect time index"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")

    obs_yearly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "yearly"
        }]
    })
    obs_raw = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "raw"
        }]})
    obs_monthly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "monthly"
        }]
    })
    obs_daily = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "daily"
        }]
    })
    obs_last = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "last"
        }]})
    obs_error = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "ølasjkdf"
        }]
    })
    obs_error2 = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": 4.43
        }]})

    mismatchyearly = obs_yearly.mismatch(ens)
    mismatchmonthly = obs_monthly.mismatch(ens)
    mismatchdaily = obs_daily.mismatch(ens)
    mismatchlast = obs_last.mismatch(ens)
    mismatchraw = obs_raw.mismatch(ens)

    # When only one datapoint is included, these should be identical:
    assert (mismatchlast["L1"] == mismatchlast["L2"]).all()
    assert (mismatchlast["L1"] == mismatchlast["MISMATCH"].abs()).all()

    # Check that we have indeed calculated things differently between the time indices:
    assert mismatchyearly["L2"].sum != mismatchmonthly["L2"].sum()
    assert mismatchdaily["L2"].sum != mismatchraw["L2"].sum()

    with pytest.raises(ValueError):
        obs_error.mismatch(ens)
    with pytest.raises(TypeError):
        # Improve here, this should give ValueError instead
        obs_error2.mismatch(ens)

    print(mismatchlast)
    print(mismatchdaily)
    print(obs_raw.mismatch(ens))
示例#9
0
def test_smryh():
    """Test that smryh mismatch calculation will respect time index"""
    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")

    ens = ScratchEnsemble(
        "test",
        testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/")

    obs_yearly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "yearly"
        }]
    })
    obs_raw = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "raw"
        }]})
    obs_monthly = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "monthly"
        }]
    })
    obs_daily = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "daily"
        }]
    })
    obs_last = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "last"
        }]})
    obs_isodatestr = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "2003-02-01"
        }]
    })
    obs_future = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "3003-02-01"
        }]
    })
    obs_past = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "1003-02-01"
        }]
    })

    assert obs_isodatestr
    obs_isodate = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": dateutil.parser.isoparse("2003-02-01"),
        }]
    })
    assert obs_isodate

    obs_error = Observations({
        "smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": "ølasjkdf"
        }]
    })
    assert not obs_error
    obs_error2 = Observations(
        {"smryh": [{
            "key": "FOPT",
            "histvec": "FOPTH",
            "time_index": 4.43
        }]})
    assert not obs_error2

    mismatchyearly = obs_yearly.mismatch(ens)
    mismatchmonthly = obs_monthly.mismatch(ens)
    mismatchdaily = obs_daily.mismatch(ens)
    mismatchlast = obs_last.mismatch(ens)
    mismatchraw = obs_raw.mismatch(ens)
    assert mismatchraw["TIME_INDEX"].unique() == ["raw"]

    mismatchdate = obs_isodate.mismatch(ens)
    assert "2003-02-01" in mismatchdate["TIME_INDEX"].unique()[0]

    mismatchdatestr = obs_isodatestr.mismatch(ens)
    # There might be a clock time included
    assert "2003-02-01" in mismatchdatestr["TIME_INDEX"].unique()[0]
    assert all(mismatchdate["L1"] == mismatchdatestr["L1"])

    mismatchfuture = obs_future.mismatch(ens)
    assert all(mismatchfuture["L1"] == mismatchlast["L1"])

    mismatchpast = obs_past.mismatch(ens)
    assert np.isclose(sum(mismatchpast["L2"]), 0.0)

    # When only one datapoint is included, these should be identical:
    assert (mismatchlast["L1"] == mismatchlast["L2"]).all()
    assert (mismatchlast["L1"] == mismatchlast["MISMATCH"].abs()).all()

    # Check that we have indeed calculated things differently between the time indices:
    assert mismatchyearly["L2"].sum != mismatchmonthly["L2"].sum()
    assert mismatchdaily["L2"].sum != mismatchraw["L2"].sum()