def test_eclsumcaching(): """Test caching of eclsum""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ens = ScratchEnsemble("reektest", dirs) # The problem here is if you load in a lot of UNSMRY files # and the Python process keeps them in memory. Not sure # how to check in code that an object has been garbage collected # but for garbage collection to work, at least the realization # _eclsum variable must be None. ens.load_smry() # Default is to do caching, so these will not be None: assert all([x._eclsum for (idx, x) in ens.realizations.items()]) # If we redo this operation, the same objects should all # be None afterwards: ens.load_smry(cache_eclsum=False) # cache_eclsum==None is from v1.1.5 no longer equivalent to False assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_stats() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_stats(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_dates() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) # Clear the cached objects because the statement above has cached it.. for _, realization in ens.realizations.items(): realization._eclsum = None ens.get_smry_dates(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()])
def test_volumetric_rates(): """Test computation of cumulative compatible rates """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") cum_df = ens.get_smry(column_keys=["F*T", "W*T*"], time_index="yearly") vol_rate_df = ens.get_volumetric_rates(column_keys=["F*T", "W*T*"], time_index="yearly") assert "DATE" in vol_rate_df assert "FWCR" not in vol_rate_df assert "FOPR" in vol_rate_df assert "FWPR" in vol_rate_df # Test each realization individually for realidx in vol_rate_df["REAL"].unique(): vol_rate_real = vol_rate_df.set_index("REAL").loc[realidx] cum_real = cum_df.set_index("REAL").loc[realidx] assert len(vol_rate_real) == 5 assert vol_rate_real["FOPR"].sum() == cum_real["FOPT"].iloc[-1]
def test_smry_via_ecl2df(): """Test that we could use ecl2df for smry extraction instead of the native code inside fmu-ensemble""" def get_smry(kwargs): """Callback function to extract smry data using ecl2df on a ScratchRealization""" eclfiles = kwargs["realization"].get_eclfiles() return ecl2df.summary.df(eclfiles, time_index=kwargs["time_index"], column_keys=kwargs["column_keys"]) if "__file__" in globals(): testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") if not HAVE_ECL2DF: pytest.skip() callback_smry = reekens.apply(get_smry, column_keys="FOPT", time_index="yearly") direct_smry = reekens.get_smry(column_keys="FOPT", time_index="yearly") assert callback_smry["FOPT"].sum() == direct_smry["FOPT"].sum() assert callback_smry["REAL"].sum() == direct_smry["REAL"].sum()
def test_emptyens(): """Check that we can initialize an empty ensemble""" ens = ScratchEnsemble("emptyens") assert not ens if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") emptydf = ens.get_smry() assert isinstance(emptydf, pd.DataFrame) assert emptydf.empty emptydatelist = ens.get_smry_dates() assert isinstance(emptydatelist, list) assert not emptydatelist emptykeys = ens.get_smrykeys() assert isinstance(emptykeys, list) assert not emptykeys emptyrates = ens.get_volumetric_rates() assert isinstance(emptyrates, pd.DataFrame) assert emptyrates.empty emptystats = ens.get_smry_stats() assert isinstance(emptystats, pd.DataFrame) assert emptystats.empty emptywells = ens.get_wellnames() assert isinstance(emptywells, list) assert not emptywells emptygroups = ens.get_groupnames() assert isinstance(emptygroups, list) assert not emptygroups emptymeta = ens.get_smry_meta() assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("*") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("FOPT") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta(["FOPT"]) assert isinstance(emptymeta, dict) assert not emptymeta # Add a realization manually: ens.add_realizations( testdir + "/data/testensemble-reek001/" + "realization-0/iter-0" ) assert len(ens) == 1
def test_get_smry_interpolation(): """Test the summary resampling code for virtual ensembles""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_scalar("npv.txt") vens_yearly = reekensemble.to_virtual() reekensemble.load_smry(time_index="monthly", column_keys=["F*"]) # Create a vens that contains both monthly and yearly: vens_monthly = reekensemble.to_virtual() assert "npv.txt" in vens_monthly.keys() reekensemble.load_smry(time_index="daily", column_keys=["F*"]) _ = reekensemble.to_virtual() # monthly, yearly *and* daily # Resample yearly to monthly: monthly = vens_yearly.get_smry(column_keys="FOPT", time_index="monthly") assert "FOPT" in monthly.columns assert "REAL" in monthly.columns assert "DATE" in monthly.columns assert len(monthly["REAL"].unique()) == 5 # 12 months pr. year, including final 1. jan, four years, 5 realizations: assert len(monthly) == (12 * 4 + 1) * 5 for realidx in monthly["REAL"].unique(): int_m = monthly.set_index("REAL").loc[realidx].set_index("DATE") true_m = (reekensemble.get_smry( column_keys="FOPT", time_index="monthly").set_index( "REAL").loc[realidx].set_index("DATE")) difference = int_m["FOPT"] - true_m["FOPT"] # The interpolation error should be zero at each 1st of January # but most likely nonzero elsewhere (at least for these realization) assert difference.loc["2001-01-01"] < 0.0001 assert abs(difference.loc["2001-06-01"]) > 0 assert difference.loc["2002-01-01"] < 0.0001 assert abs(difference.loc["2002-06-01"]) > 0 assert difference.loc["2003-01-01"] < 0.0001 daily = vens_yearly.get_smry(column_keys=["FOPT", "FOPR"], time_index="daily") assert "FOPT" in daily.columns assert "REAL" in daily.columns assert "DATE" in daily.columns assert len(daily["REAL"].unique()) == 5 assert len(daily) == (365 * 4 + 2) * 5 # 2003-01-01 and 2003-01-02 at end # Linear interpolation will give almost unique values everywhere: assert len(daily["FOPT"].unique()) > (365 * 4) * 5 # While bfill for rates cannot be more unique than the yearly input assert len(daily["FOPR"].unique()) < 4 * 5 # Must be less than the numbers
def test_noautodiscovery(): """Test that we have full control over auto-discovery of UNSMRY files""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Default ensemble construction will include auto-discovery, check # that we got that: assert not reekensemble.get_smry(column_keys="FOPT").empty assert "UNSMRY" in reekensemble.files["FILETYPE"].values # Now try again, with no autodiscovery reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0", autodiscovery=False, ) assert reekensemble.get_smry(column_keys="FOPT").empty reekensemble.find_files("eclipse/model/*UNSMRY") assert not reekensemble.get_smry(column_keys="FOPT").empty # Some very basic data is discovered even though we have autodiscovery=False assert "parameters.txt" in reekensemble.keys() assert "STATUS" in reekensemble.keys() # If these are unwanted, we can delete explicitly: reekensemble.remove_data("parameters.txt") reekensemble.remove_data(["STATUS"]) assert "parameters.txt" not in reekensemble.keys() assert "STATUS" not in reekensemble.keys()
def test_ens_premature_ecl(tmpdir): """Check an ensemble where Eclipse has failed early in realization 1""" if "__file__" in globals(): testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") origensemble = ScratchEnsemble( "origreek", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) raw_orig_smry = origensemble.load_smry() # Copy the ensemble to /tmp so we can modify the UNSMRY file in real 2: tmpdir.chdir() shutil.copytree(testdir + "/data/testensemble-reek001", "ens_fail_real_reek001") unsmry_filename = ( "ens_fail_real_reek001/realization-1/" + "iter-0/eclipse/model/2_R001_REEK-1.UNSMRY" ) shutil.copy(unsmry_filename + "-failed2000", unsmry_filename) failensemble = ScratchEnsemble( "failedreek", "ens_fail_real_reek001/realization-*/iter-0" ) raw_fail_smry = failensemble.load_smry() # This is usually superfluous when raw datetimes are obtained. raw_orig_smry["DATE"] = pd.to_datetime(raw_orig_smry["DATE"]) raw_fail_smry["DATE"] = pd.to_datetime(raw_fail_smry["DATE"]) # Homogeneous max-date in orig smry: assert len(raw_orig_smry.groupby("REAL").max()["DATE"].unique()) == 1 # Different values for raw_fail: assert len(raw_fail_smry.groupby("REAL").max()["DATE"].unique()) == 2 # END statement in schedule file on 2000-08-01 yields this: assert ( str(raw_fail_smry.groupby("REAL").max()["DATE"].loc[1]) == "2000-08-01 00:00:00" ) # Filter away all those that did not make it to the end. In normal scenarios, # this would be accomplished by .filter('OK'), but not in this test context. max_date = str(failensemble.get_smry()["DATE"].max()) filtered_fail_ensemble = failensemble.filter( "unsmry--raw", column="DATE", columncontains=max_date, inplace=False ) assert len(filtered_fail_ensemble) == 4 assert ( len(filtered_fail_ensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 1 ) # Check also get_smry(): assert len(failensemble.get_smry().groupby("REAL").max()["DATE"].unique()) == 2 # With time_index set to something, then all realization will get # interpolated onto the same date range assert ( len( failensemble.get_smry(time_index="monthly") .groupby("REAL") .max()["DATE"] .unique() ) == 1 ) # This is in fact *different* from what you would get from load_smry (issue #97) assert ( len( failensemble.load_smry(time_index="monthly") .groupby("REAL") .max()["DATE"] .unique() ) == 2 ) # (this behaviour might change, get_smry() is allowed in # the future to mimic load_smry()) # Check that FOPT is very much lower in real 1 in failed ensemble: assert ( failensemble.get_smry(column_keys="FOPT", time_index="monthly") .groupby("REAL") .max()["FOPT"] .loc[1] < 1500000 ) assert ( origensemble.get_smry(column_keys="FOPT", time_index="monthly") .groupby("REAL") .max()["FOPT"] .loc[1] > 6000000 ) # Also for yearly assert ( failensemble.get_smry(column_keys="FOPT", time_index="yearly") .groupby("REAL") .max()["FOPT"] .loc[1] < 1500000 ) assert ( origensemble.get_smry(column_keys="FOPT", time_index="yearly") .groupby("REAL") .max()["FOPT"] .loc[1] > 6000000 ) fail_foprs = failensemble.get_smry(column_keys="FOPR", time_index="monthly") # The FOPR rate vector should be all zero after the stop assert ( fail_foprs[ (fail_foprs["REAL"] == 1) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) ]["FOPR"] .abs() .sum() == 0 ) assert ( fail_foprs[ (fail_foprs["REAL"] == 0) & (fail_foprs["DATE"] > datetime.date(2000, 8, 1)) ]["FOPR"] .abs() .sum() > 0 ) # This frame treats the "failed" realization as correct, # and it will affect the stats: fail_stats = failensemble.get_smry_stats(time_index="monthly") # Here, real 1 is removed filtered_stats = filtered_fail_ensemble.get_smry_stats(time_index="monthly") # Original stats orig_stats = origensemble.get_smry_stats(time_index="monthly") # The 30 last rows are the rows from 2000-09-01 to 2003-02-01: assert fail_stats.loc["minimum"]["FOPR"].iloc[-30:].abs().sum() == 0 assert fail_stats.loc["minimum"]["FOPT"].iloc[-30:].unique()[0] == 1431247.125 # Oh no, in filtered stats, the last date 2003-02-01 is # not included, probably a minor bug! # But that means that the indexing of the last 30 is a little bit rogue. # (this test should work even that bug is fixed) assert filtered_stats.loc["minimum"]["FOPR"].iloc[-29:].abs().sum() > 0 assert len(filtered_stats.loc["minimum"]["FOPT"].iloc[-29:].unique()) == 29 # Mean FOPR and FOPT should be affected by the zero-padded rates: assert ( fail_stats.loc["mean"].iloc[-10]["FOPR"] < filtered_stats.loc["mean"].iloc[-10]["FOPR"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPR"] < orig_stats.loc["mean"].iloc[-10]["FOPR"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPT"] < filtered_stats.loc["mean"].iloc[-10]["FOPT"] ) assert ( fail_stats.loc["mean"].iloc[-10]["FOPT"] < orig_stats.loc["mean"].iloc[-10]["FOPT"] ) # Delta profiles: delta_fail = origensemble - failensemble # Delta profiles are given for all realizations delta_fail_smry = delta_fail.get_smry() assert len(delta_fail_smry["REAL"].unique()) == 5 # and they all end at the same ultimate date: assert len(delta_fail_smry.groupby("REAL").max()["DATE"].unique()) == 1 # BUT, there is only NaNs for values after 2000-08-01: assert np.isnan( delta_fail_smry[ (delta_fail_smry["REAL"] == 1) & (delta_fail_smry["DATE"] > "2000-08-01") ]["FOPT"].unique()[0] ) # Delta profiles after filtering: delta_filtered = origensemble - filtered_fail_ensemble assert len(origensemble) == 5 assert len(filtered_fail_ensemble) == 4 # assert len(delta_filtered) == 4 # Only four realizations (requires #83 resolved) # to_virtual() and time_index can be removed when #83 is finished. delta_filtered_smry = delta_filtered.to_virtual().get_smry(time_index="monthly") # Should contain only four realizations, as one has been filtered away assert len(delta_filtered_smry["REAL"].unique()) == 4 # Ultimate date is the same in all four: assert len(delta_filtered_smry.groupby("REAL").max()["DATE"].unique()) == 1
def test_ensemble_ecl(): """Eclipse specific functionality""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Eclipse summary keys: assert len(reekensemble.get_smrykeys("FOPT")) == 1 assert len(reekensemble.get_smrykeys("F*")) == 49 assert len(reekensemble.get_smrykeys(["F*", "W*"])) == 49 + 280 assert not reekensemble.get_smrykeys("BOGUS") # reading ensemble dataframe monthly = reekensemble.load_smry(time_index="monthly") monthly = reekensemble.load_smry(column_keys=["F*"], time_index="monthly") assert monthly.columns[0] == "REAL" # Enforce order of columns. assert monthly.columns[1] == "DATE" assert len(monthly) == 190 # Check that the result was cached in memory, not necessarily on disk.. assert isinstance(reekensemble.get_df("unsmry--monthly.csv"), pd.DataFrame) assert len(reekensemble.keys()) == 4 # When asking the ensemble for FOPR, we also get REAL as a column # in return. Note that the internal stored version will be # overwritten by each load_smry() assert len(reekensemble.load_smry(column_keys=["FOPR"]).columns) == 3 assert len(reekensemble.load_smry(column_keys=["FOP*"]).columns) == 11 assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).columns) == 12 # Check that there is now a cached version with raw dates: assert isinstance(reekensemble.get_df("unsmry--raw.csv"), pd.DataFrame) # The columns are not similar, this is allowed!' # If you get 3205 here, it means that you are using the union of # raw dates from all realizations, which is not correct assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).index) == 1700 # Date list handling: assert len(reekensemble.get_smry_dates(freq="report")) == 641 assert len(reekensemble.get_smry_dates(freq="raw")) == 641 assert len(reekensemble.get_smry_dates(freq="yearly")) == 5 assert len(reekensemble.get_smry_dates(freq="monthly")) == 38 assert len(reekensemble.get_smry_dates(freq="daily")) == 1098 assert len(reekensemble.get_smry_dates(freq="first")) == 1 assert len(reekensemble.get_smry_dates(freq="last")) == 1 assert reekensemble.get_smry_dates( freq="first") == reekensemble.get_smry_dates(freq="first", start_date="1900-01-01", end_date="2050-02-01") assert reekensemble.get_smry_dates( freq="last") == reekensemble.get_smry_dates(freq="last", start_date="1900-01-01", end_date="2050-02-01") assert str(reekensemble.get_smry_dates( freq="report")[-1]) == "2003-01-02 00:00:00" assert str( reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00" assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01" assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01" assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02" assert str(reekensemble.get_smry_dates(freq="first")[-1]) == "2000-01-01" assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02" assert (str( reekensemble.get_smry_dates( freq="daily", end_date="2002-03-03")[-1]) == "2002-03-03") assert (str( reekensemble.get_smry_dates( freq="daily", start_date="2002-03-03")[0]) == "2002-03-03") # Start and end outside of orig data and on the "wrong side" dates = reekensemble.get_smry_dates(end_date="1999-03-03") assert len(dates) == 1 assert str(dates[0]) == "1999-03-03" dates = reekensemble.get_smry_dates(start_date="2099-03-03") assert len(dates) == 1 assert str(dates[0]) == "2099-03-03" # Time interpolated dataframes with summary data: yearly = reekensemble.get_smry_dates(freq="yearly") assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index=yearly)) == 25 # NB: This is cached in unsmry-custom.csv, not unsmry--yearly! # This usage is discouraged. Use 'yearly' in such cases. # Check that we can shortcut get_smry_dates: assert len( reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25 assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="first")) == 5 assert isinstance(reekensemble.get_df("unsmry--first.csv"), pd.DataFrame) assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5 assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame) # Check that time_index=None and time_index="raw" behaves like default raw = reekensemble.load_smry(column_keys=["F*PT"], time_index="raw") print(raw) assert reekensemble.load_smry(column_keys=["F*PT"]).iloc[3, 2] == raw.iloc[3, 2] assert (reekensemble.load_smry(column_keys=["F*PT"], time_index=None).iloc[3, 3] == raw.iloc[3, 3]) # Give ISO-dates directly: assert (len( reekensemble.get_smry(column_keys=["FOPR"], time_index="2001-01-02")) == 5) # Summary metadata: meta = reekensemble.get_smry_meta() assert len(meta) == len(reekensemble.get_smrykeys()) assert "FOPT" in meta assert not meta["FOPT"]["is_rate"] assert meta["FOPT"]["is_total"] meta = reekensemble.get_smry_meta("FOPT") assert meta["FOPT"]["is_total"] meta = reekensemble.get_smry_meta("*") assert meta["FOPT"]["is_total"] meta = reekensemble.get_smry_meta(["*"]) assert meta["FOPT"]["is_total"] meta = reekensemble.get_smry_meta(["FOPT", "BOGUS"]) assert meta["FOPT"]["is_total"] assert "BOGUS" not in meta # Eclipse well names list assert len(reekensemble.get_wellnames("OP*")) == 5 assert len(reekensemble.get_wellnames(None)) == 8 assert len(reekensemble.get_wellnames()) == 8 assert not reekensemble.get_wellnames("") assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8 # eclipse well groups list assert len(reekensemble.get_groupnames()) == 3 # delta between two ensembles diff = reekensemble - reekensemble assert len( diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5 # eclipse summary vector statistics for a given ensemble df_stats = reekensemble.get_smry_stats(column_keys=["FOPR", "FGPR"], time_index="monthly") assert isinstance(df_stats, pd.DataFrame) assert len(df_stats.columns) == 2 assert isinstance(df_stats["FOPR"]["mean"], pd.Series) assert len(df_stats["FOPR"]["mean"].index) == 38 # check if wild cards also work for get_smry_stats df_stats = reekensemble.get_smry_stats(column_keys=["FOP*", "FGP*"], time_index="monthly") assert len(df_stats.columns) == len( reekensemble.get_smrykeys(["FOP*", "FGP*"])) # Check webviz requirements for dataframe stats = df_stats.index.levels[0] assert "minimum" in stats assert "maximum" in stats assert "p10" in stats assert "p90" in stats assert "mean" in stats assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"][ "maximum"].iloc[-2] # Check user supplied quantiles df_stats = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[0, 15, 50, 85, 100]) statistics = df_stats.index.levels[0] assert "p0" in statistics assert "p15" in statistics assert "p50" in statistics assert "p85" in statistics assert "p100" in statistics # For oil industry, p15 on FOPT should yield a larger value than p85. # But the quantiles we get out follows the rest of the world # so we check for the opposite. assert df_stats["FOPT"]["p85"][-1] > df_stats["FOPT"]["p15"][-1] with pytest.raises(ValueError): reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=["foobar"]) noquantiles = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[]) assert len(noquantiles.index.levels[0]) == 3