def read_well_connection_status( ensemble_path: str, well_connection_status_file: str ) -> Optional[pd.DataFrame]: """Reads csv file with well connection status data from the scratch disk. Merges together files from all realizations, does some fixing of the column data types, and returns it as a pandas dataframe. fmu-ensemble is used to find the file names on the scratch disk The well connection status data is extracted from the CPI data, which is 0 if the connection is SHUT and >0 if the connection is OPEN. This is independent of the status of the well. """ ens = ScratchEnsemble("ens", ensemble_path) df_files = ens.find_files(well_connection_status_file) if df_files.empty: return None df = pd.DataFrame() for _, row in df_files.iterrows(): df_real = pd.read_csv(row.FULLPATH) df_real["REAL"] = row.REAL df = pd.concat([df, df_real]) df.I = pd.to_numeric(df.I) df.J = pd.to_numeric(df.J) df["K1"] = pd.to_numeric(df.K) df = df.drop(["K"], axis=1) df.DATE = pd.to_datetime(df.DATE).dt.date return df
def test_ens_failedreals(): """Ensure we can calculate mismatch where some realizations do not have UNSMRY data""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/", autodiscovery=False, ) obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]}) mismatch = obs.mismatch(ens) # There are no UNSMRY found, so the mismatch should be empty: assert mismatch.empty ens.find_files("eclipse/model/*UNSMRY") assert not obs.mismatch(ens).empty # Reinitialize ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/", autodiscovery=False, ) # Redirect UNSMRY pointer in realizaion 3 so it isn't found ens.find_files("eclipse/model/*UNSMRY") real3files = ens[3].files real3files.loc[real3files["FILETYPE"] == "UNSMRY", "FULLPATH"] = "FOO" # Check that we only have EclSum for 2 and not for 3: assert ens[2].get_eclsum() assert not ens[3].get_eclsum() missingsmry = obs.mismatch(ens) # Realization 3 should NOT be present now assert 3 not in list(missingsmry["REAL"]) assert not obs.mismatch(ens).empty
def test_noautodiscovery(): """Test that we have full control over auto-discovery of UNSMRY files""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # Default ensemble construction will include auto-discovery, check # that we got that: assert not reekensemble.get_smry(column_keys="FOPT").empty assert "UNSMRY" in reekensemble.files["FILETYPE"].values # Now try again, with no autodiscovery reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0", autodiscovery=False, ) assert reekensemble.get_smry(column_keys="FOPT").empty reekensemble.find_files("eclipse/model/*UNSMRY") assert not reekensemble.get_smry(column_keys="FOPT").empty # Some very basic data is discovered even though we have autodiscovery=False assert "parameters.txt" in reekensemble.keys() assert "STATUS" in reekensemble.keys() # If these are unwanted, we can delete explicitly: reekensemble.remove_data("parameters.txt") reekensemble.remove_data(["STATUS"]) assert "parameters.txt" not in reekensemble.keys() assert "STATUS" not in reekensemble.keys()
def test_reek001(tmp="TMP"): """Test import of a stripped 5 realization ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") assert isinstance(reekensemble, ScratchEnsemble) assert reekensemble.name == "reektest" assert len(reekensemble) == 5 assert isinstance(reekensemble[0], ScratchRealization) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "jobs.json"]) == 5 assert (len(reekensemble.files[reekensemble.files.LOCALPATH == "parameters.txt"]) == 5) assert len( reekensemble.files[reekensemble.files.LOCALPATH == "STATUS"]) == 5 statusdf = reekensemble.get_df("STATUS") assert len(statusdf) == 250 # 5 realizations, 50 jobs in each assert "REAL" in statusdf.columns assert "FORWARD_MODEL" in statusdf.columns statusdf = statusdf.set_index(["REAL", "FORWARD_MODEL"]).sort_index() assert "DURATION" in statusdf.columns # calculated assert "argList" in statusdf.columns # from jobs.json # Sample check the duration for RMS in realization 4: assert int(statusdf.loc[4, "RMS_BATCH"]["DURATION"].values[0]) == 195 # STATUS in real4 is modified to simulate that Eclipse never finished: assert numpy.isnan(statusdf.loc[4, "ECLIPSE100_2014.2"]["DURATION"].values[0]) if not os.path.exists(tmp): os.mkdir(tmp) statusdf.to_csv(os.path.join(tmp, "status.csv"), index=False) # Parameters.txt paramsdf = reekensemble.load_txt("parameters.txt") assert len(paramsdf) == 5 # 5 realizations paramsdf = reekensemble.parameters # also test as property paramsdf = reekensemble.get_df("parameters.txt") assert len(paramsdf) == 5 assert len(paramsdf.columns) == 26 # 25 parameters, + REAL column paramsdf.to_csv(os.path.join(tmp, "params.csv"), index=False) # Check that the ensemble object has not tainted the realization dataframe: assert "REAL" not in reekensemble._realizations[0].get_df("parameters.txt") # The column FOO in parameters is only present in some, and # is present with NaN in real0: assert "FOO" in reekensemble.parameters.columns assert len(reekensemble.parameters["FOO"].dropna()) == 1 # (NaN ine one real, and non-existing in the others is the same thing) # Test loading of another txt file: reekensemble.load_txt("outputs.txt") assert "NPV" in reekensemble.load_txt("outputs.txt").columns # Check implicit discovery assert "outputs.txt" in reekensemble.files["LOCALPATH"].values assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # File discovery: csvvolfiles = reekensemble.find_files("share/results/volumes/*csv", metadata={"GRID": "simgrid"}) assert isinstance(csvvolfiles, pd.DataFrame) assert "REAL" in csvvolfiles assert "FULLPATH" in csvvolfiles assert "LOCALPATH" in csvvolfiles assert "BASENAME" in csvvolfiles # Check the explicit metadata: assert "GRID" in csvvolfiles assert csvvolfiles["GRID"].unique() == ["simgrid"] reekensemble.files.to_csv(os.path.join(tmp, "files.csv"), index=False) # Check that rediscovery does not mess things up: filecount = len(reekensemble.files) newfiles = reekensemble.find_files("share/results/volumes/*csv") # Also note that we skipped metadata here in rediscovery: assert len(reekensemble.files) == filecount assert len(newfiles) == len(csvvolfiles) # The last invocation of find_files() should not return the metadata assert len(newfiles.columns) + 1 == len(csvvolfiles.columns) # FULLPATH should always contain absolute paths assert all([os.path.isabs(x) for x in reekensemble.files["FULLPATH"]]) # The metadata in the rediscovered files should have been removed assert len( reekensemble.files[reekensemble.files["GRID"] == "simgrid"]) == 0 # CSV files csvpath = "share/results/volumes/simulator_volume_fipnum.csv" vol_df = reekensemble.load_csv(csvpath) # Check that we have not tainted the realization dataframes: assert "REAL" not in reekensemble._realizations[0].get_df(csvpath) assert "REAL" in vol_df assert len(vol_df["REAL"].unique()) == 3 # missing in 2 reals vol_df.to_csv(os.path.join(tmp, "simulatorvolumes.csv"), index=False) # Test retrival of cached data vol_df2 = reekensemble.get_df(csvpath) assert "REAL" in vol_df2 assert len(vol_df2["REAL"].unique()) == 3 # missing in 2 reals # Realization deletion: reekensemble.remove_realizations([1, 3]) assert len(reekensemble) == 3 # Readd the same realizations reekensemble.add_realizations([ testdir + "/data/testensemble-reek001/" + "realization-1/iter-0", testdir + "/data/testensemble-reek001/" + "realization-3/iter-0", ]) assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # File discovery must be repeated for the newly added realizations reekensemble.find_files( "share/results/volumes/" + "simulator_volume_fipnum.csv", metadata={"GRID": "simgrid"}, ) assert len(reekensemble.files) == 25 # Test addition of already added realization: reekensemble.add_realizations(testdir + "/data/testensemble-reek001/" + "realization-1/iter-0") assert len(reekensemble) == 5 assert len(reekensemble.files) == 24 # discovered files are lost! keycount = len(reekensemble.keys()) reekensemble.remove_data("parameters.txt") assert len(reekensemble.keys()) == keycount - 1