def test_manual_aggregation(): """Test that aggregating an ensemble using RealizationCombination is the same as calling agg() on the ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv") # Aggregate an ensemble into a virtual "mean" realization mean = reekensemble.agg("mean") # Combine the ensemble members directly into a mean computation. # Also returns a virtual realization. manualmean = (1 / 5 * (reekensemble[0] + reekensemble[1] + reekensemble[2] + reekensemble[3] + reekensemble[4])) # Commutativity proof: assert mean["parameters"]["RMS_SEED"] == manualmean["parameters"][ "RMS_SEED"]
def test_virtual_observations(): """Construct an virtual(?) observation object from a specific summary vector and use it to rank realizations for similarity. """ # We need an ensemble to work with: if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/") ens.load_smry(column_keys=["FOPT", "FGPT", "FWPT", "FWCT", "FGOR"], time_index="yearly") # And we need some VirtualRealizations virtreals = { "p90realization": ens.agg("p90"), "meanrealization": ens.agg("mean"), "p10realization": ens.agg("p10"), } summaryvector = "FOPT" representative_realizations = {} for virtrealname, virtreal in six.iteritems(virtreals): # Create empty observation object obs = Observations({}) obs.load_smry(virtreal, summaryvector, time_index="yearly") # Calculate how far each realization is from this observation set # (only one row pr. realization, as FOPTH is only one observation unit) mis = obs.mismatch(ens) closest_realization = ( mis.groupby("REAL").sum()["L2"].sort_values().index.values[0]) representative_realizations[virtrealname] = closest_realization assert representative_realizations["meanrealization"] == 4 assert representative_realizations["p90realization"] == 2 assert representative_realizations["p10realization"] == 1
def test_ensemble_aggregations(tmpdir): """Test aggregations of ensembles, that is taking means, medians, p10 and so on, producing virtual realizations""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") reekensemble.load_smry(time_index="monthly", column_keys=["F*"]) reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv") reekensemble.load_scalar("npv.txt", convert_numeric=True) stats = { "mean": reekensemble.agg("mean"), "median": reekensemble.agg("median"), "min": reekensemble.agg("min"), "max": reekensemble.agg("max"), "p10": reekensemble.agg("p10"), # low estimate "p90": reekensemble.agg("p90"), # high estimate } tmpdir.chdir() stats["min"].to_disk("virtreal_min", delete=True) stats["max"].to_disk("virtreal_max", delete=True) stats["mean"].to_disk("virtreal_mean", delete=True) assert (stats["min"]["parameters.txt"]["RMS_SEED"] < stats["max"]["parameters.txt"]["RMS_SEED"]) assert (stats["min"]["parameters.txt"]["RMS_SEED"] <= stats["p10"]["parameters.txt"]["RMS_SEED"]) assert (stats["p10"]["parameters.txt"]["RMS_SEED"] <= stats["median"]["parameters.txt"]["RMS_SEED"]) assert (stats["median"]["parameters.txt"]["RMS_SEED"] <= stats["p90"]["parameters.txt"]["RMS_SEED"]) assert (stats["p90"]["parameters.txt"]["RMS_SEED"] <= stats["max"]["parameters.txt"]["RMS_SEED"]) assert (stats["min"]["parameters.txt"]["RMS_SEED"] <= stats["mean"]["parameters.txt"]["RMS_SEED"]) assert (stats["min"]["parameters.txt"]["RMS_SEED"] <= stats["max"]["parameters.txt"]["RMS_SEED"]) assert (stats["min"]["unsmry--monthly"]["FOPT"].iloc[-1] < stats["max"]["unsmry--monthly"]["FOPT"].iloc[-1]) # .loc[2] corresponds to FIPNUM=3 assert (stats["min"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"] < stats["mean"]["simulator_volume_fipnum"].iloc[2]["STOIIP_OIL"]) assert (stats["mean"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"] < stats["max"]["simulator_volume_fipnum"].loc[2]["STOIIP_OIL"]) # Aggregation of STATUS also works. Note that min and max # works for string columns, so the available data will vary # depending on aggregation method assert (stats["p10"]["STATUS"].iloc[49]["DURATION"] < stats["max"]["STATUS"].iloc[49]["DURATION"]) # job 49 is the Eclipse forward model assert "npv.txt" in stats["mean"].keys() assert stats["mean"]["npv.txt"] == 3382.5 # Test agg(excludekeys=..) assert "STATUS" not in reekensemble.agg("mean", excludekeys="STATUS").keys() assert "STATUS" not in reekensemble.agg("mean", keylist=["parameters.txt"]).keys() assert (reekensemble.agg("p01")["parameters"]["RMS_SEED"] < reekensemble.agg("p99")["parameters"]["RMS_SEED"]) with pytest.raises(ValueError): reekensemble.agg("foobar") # Check that include/exclude functionality in agg() works: assert ("parameters.txt" not in reekensemble.agg("mean", excludekeys="parameters.txt").keys()) assert ("parameters.txt" not in reekensemble.agg("mean", excludekeys=["parameters.txt"]).keys()) assert "parameters.txt" not in reekensemble.agg("mean", keylist="STATUS").keys() assert "parameters.txt" not in reekensemble.agg("mean", keylist=["STATUS"]).keys() # Shorthand notion works for keys to include, but they # should get returned with fully qualified paths. assert ("share/results/tables/unsmry--yearly.csv" in reekensemble.agg("mean", keylist="unsmry--yearly").keys()) assert ("share/results/tables/unsmry--yearly.csv" in reekensemble.agg("mean", keylist=["unsmry--yearly"]).keys()) assert isinstance( reekensemble.agg("mean", keylist="unsmry--yearly").get_df("unsmry--yearly"), pd.DataFrame, )
def test_filter(): """Test filtering of realizations in ensembles Realizations not fulfilling tested conditions are dropped from the ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" reekensemble = ScratchEnsemble("reektest", dirs) # This should just require a STATUS file to be there # for every realization assert len(reekensemble.filter("STATUS")) == 5 # Test string equivalence on numeric data: reekensemble.filter("parameters.txt", key="RMS_SEED", value="723121249", inplace=True) assert len(reekensemble) == 2 # (False positive from pylint on this line) assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249 # Test numeric equivalence reekensemble = ScratchEnsemble("reektest", dirs) reekensemble.filter("parameters.txt", key="RMS_SEED", value=723121249, inplace=True) assert len(reekensemble) == 2 assert reekensemble.agg("mean")["parameters"]["RMS_SEED"] == 723121249 reekensemble = ScratchEnsemble("reektest", dirs) filtered = reekensemble.filter("parameters.txt", key="FOO", inplace=False) assert len(filtered) == 2 # (NaN in one of the parameters.txt is True in this context) filtered = reekensemble.filter("parameters.txt", key="MULTFLT_F1", value=0.001, inplace=False) assert len(filtered) == 4 assert (len( reekensemble.filter("parameters.txt", key="FWL", value=1700, inplace=False)) == 3) assert (len( reekensemble.filter("parameters.txt", key="FWL", value="1700", inplace=False)) == 3) # This one is tricky, the empty string should correspond to # missing data - NOT IMPLEMENTED # assert len(reekensemble.filter('parameters.txt', key='FOO', # value='', inplace=False) == 4) # while no value means that the key must be present assert len(reekensemble.filter("parameters.txt", key="FOO", inplace=False)) == 2 # 'key' is not accepted for things that are tables. with pytest.raises(ValueError): reekensemble.filter("STATUS", key="ECLIPSE") with pytest.raises(ValueError): reekensemble.filter("STATUS", value="ECLIPSE") # Check column presence assert len(reekensemble.filter("STATUS", column="FORWARD_MODEL")) == 5 assert (len( reekensemble.filter("STATUS", column="FORWARD_MODEL", inplace=False)) == 5) assert not reekensemble.filter("STATUS", column="FOOBAR", inplace=False) with pytest.raises(ValueError): reekensemble.filter("STATUS", wrongarg="FOOBAR", inplace=False) assert (len( reekensemble.filter("STATUS", column="FORWARD_MODEL", columncontains="ECLIPSE100_2014.2")) == 5) assert not reekensemble.filter( "STATUS", column="FORWARD_MODEL", columncontains="ECLIPSE100_2010.2", inplace=False, ) reekensemble.load_smry() assert len(reekensemble.filter("unsmry--raw")) == 5 assert len(reekensemble.filter("unsmry--raw", column="FOPT")) == 5 assert not reekensemble.filter( "unsmry--raw", column="FOOBAR", inplace=False) assert len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=0)) == 5 assert not reekensemble.filter( "unsmry--raw", column="FOPT", columncontains=-1000, inplace=False) assert (len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=6025523.0, inplace=False)) == 1) assert (len( reekensemble.filter("unsmry--raw", column="FOPT", columncontains=6025523, inplace=False)) == 1) # We do not support strings here (not yet) # assert len(reekensemble.filter('unsmry--raw', column='FOPT', # columncontains='6025523.0', # inplace=False)) == 1 assert (len( reekensemble.filter("unsmry--raw", column="DATE", columncontains="2002-11-25", inplace=False)) == 5) assert (len( reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2002-11-25 00:00:00", inplace=False, )) == 5) assert not reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2002-11-25 00:00:01", inplace=False, ) assert (len( reekensemble.filter( "unsmry--raw", column="DATE", columncontains="2000-01-07 02:26:15", inplace=False, )) == 3) assert not reekensemble.filter("unsmry--raw", column="DATE", columncontains="2000-01-07", inplace=False)