def test_ensembleset_reek001(tmpdir): """Test import of a stripped 5 realization ensemble, manually doubled to two identical ensembles """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ensdir = os.path.join(testdir, "data/testensemble-reek001/") tmpdir.chdir() symlink_iter(ensdir, "iter-0") symlink_iter(ensdir, "iter-1") iter0 = ScratchEnsemble("iter-0", str(tmpdir.join("realization-*/iter-0"))) iter1 = ScratchEnsemble("iter-1", str(tmpdir.join("realization-*/iter-1"))) ensset = EnsembleSet("reek001", [iter0, iter1]) assert len(ensset) == 2 assert len(ensset["iter-0"].get_df("STATUS")) == 250 assert len(ensset["iter-1"].get_df("STATUS")) == 250 # Try adding the same object over again try: ensset.add_ensemble(iter0) except ValueError: pass assert len(ensset) == 2 # Unchanged! # Initializing nothing, we get warning about the missing name noname = EnsembleSet() assert noname.name # not None assert isinstance(noname.name, str) # And it should be a string # Initialize starting from empty ensemble ensset2 = EnsembleSet("reek001", []) assert ensset2.name == "reek001" ensset2.add_ensemble(iter0) ensset2.add_ensemble(iter1) assert len(ensset2) == 2 # Check that we can skip the empty list: ensset2x = EnsembleSet("reek001") ensset2x.add_ensemble(iter0) ensset2x.add_ensemble(iter1) assert len(ensset2x) == 2 # Initialize directly from path with globbing: ensset3 = EnsembleSet("reek001direct", []) assert ensset3.name == "reek001direct" ensset3.add_ensembles_frompath(".") assert len(ensset3) == 2 # Alternative globbing: ensset4 = EnsembleSet("reek001direct2", frompath=".") assert len(ensset4) == 2 # Testing aggregation of parameters paramsdf = ensset3.parameters paramsdf.to_csv("enssetparams.csv", index=False) assert isinstance(paramsdf, pd.DataFrame) assert len(ensset3.parameters) == 10 assert len(ensset3.parameters.columns) == 27 assert "ENSEMBLE" in ensset3.parameters.columns assert "REAL" in ensset3.parameters.columns outputs = ensset3.load_txt("outputs.txt") assert "NPV" in outputs.columns # Test Eclipse summary handling: assert len(ensset3.get_smry_dates(freq="report")) == 641 assert len(ensset3.get_smry_dates(freq="monthly")) == 37 assert len(ensset3.load_smry(column_keys=["FOPT"], time_index="yearly")) == 50 monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly") assert monthly.columns[0] == "ENSEMBLE" assert monthly.columns[1] == "REAL" assert monthly.columns[2] == "DATE" raw = ensset3.load_smry(column_keys=["F*PT"], time_index="raw") assert ensset3.load_smry(column_keys=["F*PT"]).iloc[3, 4] == raw.iloc[3, 4] assert (ensset3.load_smry(column_keys=["F*PT"], time_index=None).iloc[3, 5] == raw.iloc[3, 5]) # Eclipse well names assert len(ensset3.get_wellnames("OP*")) == 5 assert len(ensset3.get_wellnames("WI*")) == 3 assert not ensset3.get_wellnames("") assert len(ensset3.get_wellnames()) == 8 # Check that we can retrieve cached versions assert len(ensset3.get_df("unsmry--monthly")) == 380 assert len(ensset3.get_df("unsmry--yearly")) == 50 monthly.to_csv("ensset-monthly.csv", index=False) # Test merging in get_df() param_output = ensset3.get_df("parameters.txt", merge="outputs") assert "top_structure" in param_output assert "SORG1" in param_output smry_params = ensset3.get_df("unsmry--monthly", merge="parameters") assert "SORG1" in smry_params assert "FWCT" in smry_params # Merging with something that does not exist: with pytest.raises(KeyError): ensset3.get_df("unsmry--monthly", merge="barrFF") with pytest.raises((KeyError, ValueError)): ensset3.get_df("unsmry--weekly") # Check errors when we ask for stupid stuff with pytest.raises((KeyError, ValueError)): ensset3.load_csv("bogus.csv") with pytest.raises((KeyError, ValueError)): ensset3.get_df("bogus.csv") # Check get_smry() smry = ensset3.get_smry(time_index="yearly", column_keys=["FWCT", "FGOR"], end_date="2002-02-01") assert "ENSEMBLE" in smry assert "REAL" in smry assert len(smry["ENSEMBLE"].unique()) == 2 assert len(smry["REAL"].unique()) == 5 assert "FWCT" in smry assert "FGOR" in smry assert "DATE" in smry assert len(smry) == 40 # Eclipse well names list assert len(ensset3.get_wellnames("OP*")) == 5 assert len(ensset3.get_wellnames(None)) == 8 assert len(ensset3.get_wellnames()) == 8 assert not ensset3.get_wellnames("") assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8 # Test aggregation of csv files: vol_df = ensset3.load_csv("share/results/volumes/" + "simulator_volume_fipnum.csv") assert "REAL" in vol_df assert "ENSEMBLE" in vol_df assert len(vol_df["REAL"].unique()) == 3 assert len(vol_df["ENSEMBLE"].unique()) == 2 assert len(ensset3.keys()) == 8 # Test scalar imports: ensset3.load_scalar("npv.txt") npv = ensset3.get_df("npv.txt") assert "ENSEMBLE" in npv assert "REAL" in npv assert "npv.txt" in npv assert len(npv) == 10 # Scalar import with forced numerics: ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True) npv = ensset3.get_df("npv.txt") assert len(npv) == 8 predel_len = len(ensset3.keys()) ensset3.drop("parameters.txt") assert len(ensset3.keys()) == predel_len - 1 # Test callback functionality, that we can convert rms # volumetrics in each realization. First we need a # wrapper which is able to work on ScratchRealizations. def rms_vol2df(kwargs): """Callback function to be sent to ensemble objects""" fullpath = os.path.join(kwargs["realization"].runpath(), kwargs["filename"]) # The supplied callback should not fail too easy. if os.path.exists(fullpath): return volumetrics.rmsvolumetrics_txt2df(fullpath) return pd.DataFrame() if not SKIP_FMU_TOOLS: rmsvols_df = ensset3.apply(rms_vol2df, filename="share/results/volumes/" + "geogrid_vol_oil_1.txt") assert rmsvols_df["STOIIP_OIL"].sum() > 0 assert len(rmsvols_df["REAL"].unique()) == 4 assert len(rmsvols_df["ENSEMBLE"].unique()) == 2 # Test that we can dump to disk as well and load from csv: ensset3.apply( rms_vol2df, filename="share/results/volumes/" + "geogrid_vol_oil_1.txt", localpath="share/results/volumes/geogrid--oil.csv", dumptodisk=True, ) geogrid_oil = ensset3.load_csv( "share/results/volumes/geogrid--oil.csv") assert len(geogrid_oil["REAL"].unique()) == 4 assert len(geogrid_oil["ENSEMBLE"].unique()) == 2 # Initialize differently, using only the root path containing # realization-* ensset4 = EnsembleSet("foo", frompath=".") assert len(ensset4) == 2 assert isinstance(ensset4["iter-0"], ScratchEnsemble) assert isinstance(ensset4["iter-1"], ScratchEnsemble) # Try the batch command feature: ensset5 = EnsembleSet( "reek001", frompath=".", batch=[ { "load_scalar": { "localpath": "npv.txt" } }, { "load_smry": { "column_keys": "FOPT", "time_index": "yearly" } }, { "load_smry": { "column_keys": "*", "time_index": "daily" } }, ], ) assert len(ensset5.get_df("npv.txt")) == 10 assert len(ensset5.get_df("unsmry--yearly")) == 50 assert len(ensset5.get_df("unsmry--daily")) == 10980 # Try batch processing after initialization: ensset6 = EnsembleSet("reek001", frompath=".") ensset6.process_batch(batch=[ { "load_scalar": { "localpath": "npv.txt" } }, { "load_smry": { "column_keys": "FOPT", "time_index": "yearly" } }, { "load_smry": { "column_keys": "*", "time_index": "daily" } }, ]) assert len(ensset5.get_df("npv.txt")) == 10 assert len(ensset5.get_df("unsmry--yearly")) == 50 assert len(ensset5.get_df("unsmry--daily")) == 10980
def test_ensembleset_reek001(tmp="TMP"): """Test import of a stripped 5 realization ensemble, manually doubled to two identical ensembles """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ensdir = os.path.join(testdir, "data/testensemble-reek001/") # Copy iter-0 to iter-1, creating an identical ensemble # we can load for testing. for realizationdir in glob.glob(ensdir + "/realization-*"): if os.path.exists(realizationdir + "/iter-1"): if os.path.islink(realizationdir + "/iter-1"): os.remove(realizationdir + "/iter-1") else: shutil.rmtree(realizationdir + "/iter-1") os.symlink(realizationdir + "/iter-0", realizationdir + "/iter-1") iter0 = ScratchEnsemble("iter-0", ensdir + "/realization-*/iter-0") iter1 = ScratchEnsemble("iter-1", ensdir + "/realization-*/iter-1") ensset = EnsembleSet("reek001", [iter0, iter1]) assert len(ensset) == 2 assert len(ensset["iter-0"].get_df("STATUS")) == 250 assert len(ensset["iter-1"].get_df("STATUS")) == 250 # Try adding the same object over again try: ensset.add_ensemble(iter0) except ValueError: pass assert len(ensset) == 2 # Unchanged! # Initializing nothing, we get warning about the missing name noname = EnsembleSet() assert noname.name # not None assert isinstance(noname.name, str) # And it should be a string # Initialize starting from empty ensemble ensset2 = EnsembleSet("reek001", []) assert ensset2.name == "reek001" ensset2.add_ensemble(iter0) ensset2.add_ensemble(iter1) assert len(ensset2) == 2 # Check that we can skip the empty list: ensset2x = EnsembleSet("reek001") ensset2x.add_ensemble(iter0) ensset2x.add_ensemble(iter1) assert len(ensset2x) == 2 # Initialize directly from path with globbing: ensset3 = EnsembleSet("reek001direct", []) assert ensset3.name == "reek001direct" ensset3.add_ensembles_frompath(ensdir) assert len(ensset3) == 2 # Alternative globbing: ensset4 = EnsembleSet("reek001direct2", frompath=ensdir) assert len(ensset4) == 2 # Testing aggregation of parameters paramsdf = ensset3.parameters if not os.path.exists(tmp): os.mkdir(tmp) paramsdf.to_csv(os.path.join(tmp, "enssetparams.csv"), index=False) assert isinstance(paramsdf, pd.DataFrame) assert len(ensset3.parameters) == 10 assert len(ensset3.parameters.columns) == 27 assert "ENSEMBLE" in ensset3.parameters.columns assert "REAL" in ensset3.parameters.columns outputs = ensset3.load_txt("outputs.txt") assert "NPV" in outputs.columns # Test Eclipse summary handling: assert len(ensset3.get_smry_dates(freq="report")) == 641 assert len(ensset3.get_smry_dates(freq="monthly")) == 37 assert len(ensset3.load_smry(column_keys=["FOPT"], time_index="yearly")) == 50 monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly") assert monthly.columns[0] == "ENSEMBLE" assert monthly.columns[1] == "REAL" assert monthly.columns[2] == "DATE" # Eclipse well names assert len(ensset3.get_wellnames("OP*")) == 5 assert len(ensset3.get_wellnames("WI*")) == 3 assert not ensset3.get_wellnames("") assert len(ensset3.get_wellnames()) == 8 # Check that we can retrieve cached versions assert len(ensset3.get_df("unsmry--monthly")) == 380 assert len(ensset3.get_df("unsmry--yearly")) == 50 monthly.to_csv(os.path.join(tmp, "ensset-monthly.csv"), index=False) with pytest.raises(ValueError): ensset3.get_df("unsmry--weekly") # Check errors when we ask for stupid stuff with pytest.raises(ValueError): ensset3.load_csv("bogus.csv") with pytest.raises(ValueError): ensset3.get_df("bogus.csv") # Check get_smry() smry = ensset3.get_smry(time_index="yearly", column_keys=["FWCT", "FGOR"], end_date="2002-02-01") assert "ENSEMBLE" in smry assert "REAL" in smry assert len(smry["ENSEMBLE"].unique()) == 2 assert len(smry["REAL"].unique()) == 5 assert "FWCT" in smry assert "FGOR" in smry assert "DATE" in smry assert len(smry) == 40 # Eclipse well names list assert len(ensset3.get_wellnames("OP*")) == 5 assert len(ensset3.get_wellnames(None)) == 8 assert len(ensset3.get_wellnames()) == 8 assert not ensset3.get_wellnames("") assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8 # Test aggregation of csv files: vol_df = ensset3.load_csv("share/results/volumes/" + "simulator_volume_fipnum.csv") assert "REAL" in vol_df assert "ENSEMBLE" in vol_df assert len(vol_df["REAL"].unique()) == 3 assert len(vol_df["ENSEMBLE"].unique()) == 2 assert len(ensset3.keys()) == 7 # Test scalar imports: ensset3.load_scalar("npv.txt") npv = ensset3.get_df("npv.txt") assert "ENSEMBLE" in npv assert "REAL" in npv assert "npv.txt" in npv assert len(npv) == 10 # Scalar import with forced numerics: ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True) npv = ensset3.get_df("npv.txt") assert len(npv) == 8 predel_len = len(ensset3.keys()) ensset3.drop("parameters.txt") assert len(ensset3.keys()) == predel_len - 1 # Test callback functionality, that we can convert rms # volumetrics in each realization. First we need a # wrapper which is able to work on ScratchRealizations. def rms_vol2df(kwargs): fullpath = os.path.join(kwargs["realization"].runpath(), kwargs["filename"]) # The supplied callback should not fail too easy. if os.path.exists(fullpath): return volumetrics.rmsvolumetrics_txt2df(fullpath) else: return pd.DataFrame() if not SKIP_FMU_TOOLS: rmsvols_df = ensset3.apply(rms_vol2df, filename="share/results/volumes/" + "geogrid_vol_oil_1.txt") assert rmsvols_df["STOIIP_OIL"].sum() > 0 assert len(rmsvols_df["REAL"].unique()) == 4 assert len(rmsvols_df["ENSEMBLE"].unique()) == 2 # Test that we can dump to disk as well and load from csv: ensset3.apply( rms_vol2df, filename="share/results/volumes/" + "geogrid_vol_oil_1.txt", localpath="share/results/volumes/geogrid--oil.csv", dumptodisk=True, ) geogrid_oil = ensset3.load_csv( "share/results/volumes/geogrid--oil.csv") assert len(geogrid_oil["REAL"].unique()) == 4 assert len(geogrid_oil["ENSEMBLE"].unique()) == 2 # Clean up what we just dumped: for real_dir in glob.glob(ensdir + "/realization-*"): csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv" if os.path.exists(csvfile): os.remove(csvfile) # Initialize differently, using only the root path containing # realization-* ensset4 = EnsembleSet("foo", frompath=ensdir) assert len(ensset4) == 2 assert isinstance(ensset4["iter-0"], ScratchEnsemble) assert isinstance(ensset4["iter-1"], ScratchEnsemble) # Delete the symlink and leftover from apply-testing when we are done. for real_dir in glob.glob(ensdir + "/realization-*"): if not SKIP_FMU_TOOLS: csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv" if os.path.exists(csvfile): os.remove(csvfile) if os.path.exists(real_dir + "/iter-1"): os.remove(real_dir + "/iter-1")
from fmu.ensemble import EnsembleSet # Gather selected CSV files from each realization # and dump them (each indivitually merged with parameters.txt) # to share/results ens = EnsembleSet(frompath="realization-*/iter-*") csv_files = [ "volumes/geogrid--oil.csv", "volumes/simgrid--oil.csv", "volumes/simulator_volume_fipnum.csv", "tables/rft.csv", "tables/unsmry--monthly.csv", "tables/equil.csv", "tables/relperm.csv", "tables/pvt.csv", ] for file in csv_files: ens.load_csv("share/results/" + file) ens.get_df(file.split("/")[1], merge="parameters.txt").to_csv("share/results/" + file, index=False)