def test_eclsumcaching(): """Test caching of eclsum""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") dirs = testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ens = ScratchEnsemble("reektest", dirs) # The problem here is if you load in a lot of UNSMRY files # and the Python process keeps them in memory. Not sure # how to check in code that an object has been garbage collected # but for garbage collection to work, at least the realization # _eclsum variable must be None. ens.load_smry() # Default is to do caching, so these will not be None: assert all([x._eclsum for (idx, x) in ens.realizations.items()]) # If we redo this operation, the same objects should all # be None afterwards: ens.load_smry(cache_eclsum=False) # cache_eclsum==None is from v1.1.5 no longer equivalent to False assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_stats() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_stats(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()]) ens.get_smry_dates() assert all([x._eclsum for (idx, x) in ens.realizations.items()]) # Clear the cached objects because the statement above has cached it.. for _, realization in ens.realizations.items(): realization._eclsum = None ens.get_smry_dates(cache_eclsum=False) assert not any([x._eclsum for (idx, x) in ens.realizations.items()])
def test_emptyens(): """Check that we can initialize an empty ensemble""" ens = ScratchEnsemble("emptyens") assert not ens if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") emptydf = ens.get_smry() assert isinstance(emptydf, pd.DataFrame) assert emptydf.empty emptydatelist = ens.get_smry_dates() assert isinstance(emptydatelist, list) assert not emptydatelist emptykeys = ens.get_smrykeys() assert isinstance(emptykeys, list) assert not emptykeys emptyrates = ens.get_volumetric_rates() assert isinstance(emptyrates, pd.DataFrame) assert emptyrates.empty emptystats = ens.get_smry_stats() assert isinstance(emptystats, pd.DataFrame) assert emptystats.empty emptywells = ens.get_wellnames() assert isinstance(emptywells, list) assert not emptywells emptygroups = ens.get_groupnames() assert isinstance(emptygroups, list) assert not emptygroups emptymeta = ens.get_smry_meta() assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("*") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta("FOPT") assert isinstance(emptymeta, dict) assert not emptymeta emptymeta = ens.get_smry_meta(["FOPT"]) assert isinstance(emptymeta, dict) assert not emptymeta # Add a realization manually: ens.add_realizations( testdir + "/data/testensemble-reek001/" + "realization-0/iter-0" ) assert len(ens) == 1
def test_ensemble_ecl(): """Eclipse specific functionality""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Eclipse summary keys: assert len(reekensemble.get_smrykeys("FOPT")) == 1 assert len(reekensemble.get_smrykeys("F*")) == 49 assert len(reekensemble.get_smrykeys(["F*", "W*"])) == 49 + 280 assert not reekensemble.get_smrykeys("BOGUS") # reading ensemble dataframe monthly = reekensemble.load_smry(time_index="monthly") monthly = reekensemble.load_smry(column_keys=["F*"], time_index="monthly") assert monthly.columns[0] == "REAL" # Enforce order of columns. assert monthly.columns[1] == "DATE" assert len(monthly) == 190 # Check that the result was cached in memory, not necessarily on disk.. assert isinstance(reekensemble.get_df("unsmry--monthly.csv"), pd.DataFrame) assert len(reekensemble.keys()) == 4 # When asking the ensemble for FOPR, we also get REAL as a column # in return. Note that the internal stored version will be # overwritten by each load_smry() assert len(reekensemble.load_smry(column_keys=["FOPR"]).columns) == 3 assert len(reekensemble.load_smry(column_keys=["FOP*"]).columns) == 11 assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).columns) == 12 # Check that there is now a cached version with raw dates: assert isinstance(reekensemble.get_df("unsmry--raw.csv"), pd.DataFrame) # The columns are not similar, this is allowed!' # If you get 3205 here, it means that you are using the union of # raw dates from all realizations, which is not correct assert len( reekensemble.load_smry(column_keys=["FGPR", "FOP*"]).index) == 1700 # Date list handling: assert len(reekensemble.get_smry_dates(freq="report")) == 641 assert len(reekensemble.get_smry_dates(freq="raw")) == 641 assert len(reekensemble.get_smry_dates(freq="yearly")) == 5 assert len(reekensemble.get_smry_dates(freq="monthly")) == 38 assert len(reekensemble.get_smry_dates(freq="daily")) == 1098 assert len(reekensemble.get_smry_dates(freq="last")) == 1 assert reekensemble.get_smry_dates( freq="last") == reekensemble.get_smry_dates(freq="last", end_date="2050-02-01") assert str(reekensemble.get_smry_dates( freq="report")[-1]) == "2003-01-02 00:00:00" assert str( reekensemble.get_smry_dates(freq="raw")[-1]) == "2003-01-02 00:00:00" assert str(reekensemble.get_smry_dates(freq="yearly")[-1]) == "2004-01-01" assert str(reekensemble.get_smry_dates(freq="monthly")[-1]) == "2003-02-01" assert str(reekensemble.get_smry_dates(freq="daily")[-1]) == "2003-01-02" assert str(reekensemble.get_smry_dates(freq="last")[-1]) == "2003-01-02" assert (str( reekensemble.get_smry_dates( freq="daily", end_date="2002-03-03")[-1]) == "2002-03-03") assert (str( reekensemble.get_smry_dates( freq="daily", start_date="2002-03-03")[0]) == "2002-03-03") # Start and end outside of orig data and on the "wrong side" dates = reekensemble.get_smry_dates(end_date="1999-03-03") assert len(dates) == 1 assert str(dates[0]) == "1999-03-03" dates = reekensemble.get_smry_dates(start_date="2099-03-03") assert len(dates) == 1 assert str(dates[0]) == "2099-03-03" # Time interpolated dataframes with summary data: yearly = reekensemble.get_smry_dates(freq="yearly") assert len(reekensemble.load_smry(column_keys=["FOPT"], time_index=yearly)) == 25 # NB: This is cached in unsmry-custom.csv, not unsmry--yearly! # This usage is discouraged. Use 'yearly' in such cases. # Check that we can shortcut get_smry_dates: assert len( reekensemble.load_smry(column_keys=["FOPT"], time_index="yearly")) == 25 assert len(reekensemble.load_smry(column_keys=["FOPR"], time_index="last")) == 5 assert isinstance(reekensemble.get_df("unsmry--last.csv"), pd.DataFrame) # Eclipse well names list assert len(reekensemble.get_wellnames("OP*")) == 5 assert len(reekensemble.get_wellnames(None)) == 8 assert len(reekensemble.get_wellnames()) == 8 assert not reekensemble.get_wellnames("") assert len(reekensemble.get_wellnames(["OP*", "WI*"])) == 8 # eclipse well groups list assert len(reekensemble.get_groupnames()) == 3 # delta between two ensembles diff = reekensemble - reekensemble assert len( diff.get_smry(column_keys=["FOPR", "FGPR", "FWCT"]).columns) == 5 # eclipse summary vector statistics for a given ensemble df_stats = reekensemble.get_smry_stats(column_keys=["FOPR", "FGPR"], time_index="monthly") assert isinstance(df_stats, pd.DataFrame) assert len(df_stats.columns) == 2 assert isinstance(df_stats["FOPR"]["mean"], pd.Series) assert len(df_stats["FOPR"]["mean"].index) == 38 # check if wild cards also work for get_smry_stats df_stats = reekensemble.get_smry_stats(column_keys=["FOP*", "FGP*"], time_index="monthly") assert len(df_stats.columns) == len( reekensemble.get_smrykeys(["FOP*", "FGP*"])) # Check webviz requirements for dataframe stats = df_stats.index.levels[0] assert "minimum" in stats assert "maximum" in stats assert "p10" in stats assert "p90" in stats assert "mean" in stats assert df_stats["FOPR"]["minimum"].iloc[-2] < df_stats["FOPR"][ "maximum"].iloc[-2] # Check user supplied quantiles df_stats = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[0, 15, 50, 85, 100]) statistics = df_stats.index.levels[0] assert "p0" in statistics assert "p15" in statistics assert "p50" in statistics assert "p85" in statistics assert "p100" in statistics # For oil industry, p15 on FOPT should yield a larger value than p85. # But the quantiles we get out follows the rest of the world # so we check for the opposite. assert df_stats["FOPT"]["p85"][-1] > df_stats["FOPT"]["p15"][-1] with pytest.raises(ValueError): reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=["foobar"]) noquantiles = reekensemble.get_smry_stats(column_keys=["FOPT"], time_index="yearly", quantiles=[]) assert len(noquantiles.index.levels[0]) == 3