def test_nonstandard_dirs(tmpdir): """Test that we can initialize ensembles from some non-standard directories.""" tmpdir.chdir() ensdir = "foo-ens-bar/" os.makedirs(ensdir) os.makedirs(ensdir + "/bar_001/iter_003") os.makedirs(ensdir + "/bar_002/iter_003") os.makedirs(ensdir + "/bar_003/iter_003") enspaths = ensdir + "/bar_*/iter_003" ens = ScratchEnsemble("foo", enspaths) # The logger should also print CRITICAL statements here. assert not ens # But if we specify a realidxregex, it should work ens = ScratchEnsemble("foo", enspaths, realidxregexp=r"bar_(\d+)") assert len(ens) == 3 # Supplying wrong regexpes: ens = ScratchEnsemble("foo", enspaths, realidxregexp="bar_xx") assert not ens
def test_reek(): """Import the reek ensemble and apply ecl2df functions on the realizations""" if "__file__" in globals(): testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") if not HAVE_ECL2DF: pytest.skip() def extract_compdat(kwargs): """Callback fnction to extract compdata data using ecl2df on a ScratchRealization""" eclfiles = kwargs["realization"].get_eclfiles() if not eclfiles: print("Could not obtain EclFiles object for realization " + str(kwargs["realization"].index)) return ecl2df.compdat.deck2dfs(eclfiles.get_ecldeck())["COMPDAT"] allcompdats = reekens.apply(extract_compdat) assert not allcompdats.empty assert 0 in allcompdats["REAL"] assert "KH" in allcompdats
def _load_smry_dataframe_using_fmu( ens_path: str, frequency: Optional[Frequency] ) -> pd.DataFrame: time_index: str = "raw" if frequency: time_index = frequency.value print(f"## Loading data into DataFrame using FMU time_index={time_index}...") scratch_ensemble = ScratchEnsemble("tempEnsName", paths=ens_path) df = scratch_ensemble.load_smry(time_index=time_index) df = _make_date_column_datetime_object(df) # Convert float columns to float32 and real column to int32 floatcols = df.select_dtypes("float").columns df[floatcols] = df[floatcols].apply(pd.to_numeric, downcast="float") df["REAL"] = df["REAL"].astype("int32") # Sort on real, then date to align with provider df.sort_values(by=["REAL", "DATE"], inplace=True) df.reset_index(drop=True, inplace=True) return df
def test_smry_via_ecl2df(): """Test that we could use ecl2df for smry extraction instead of the native code inside fmu-ensemble""" def get_smry(kwargs): """Callback function to extract smry data using ecl2df on a ScratchRealization""" eclfiles = kwargs["realization"].get_eclfiles() return ecl2df.summary.df(eclfiles, time_index=kwargs["time_index"], column_keys=kwargs["column_keys"]) if "__file__" in globals(): testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") if not HAVE_ECL2DF: pytest.skip() callback_smry = reekens.apply(get_smry, column_keys="FOPT", time_index="yearly") direct_smry = reekens.get_smry(column_keys="FOPT", time_index="yearly") assert callback_smry["FOPT"].sum() == direct_smry["FOPT"].sum() assert callback_smry["REAL"].sum() == direct_smry["REAL"].sum()
def read_well_connection_status( ensemble_path: str, well_connection_status_file: str ) -> Optional[pd.DataFrame]: """Reads csv file with well connection status data from the scratch disk. Merges together files from all realizations, does some fixing of the column data types, and returns it as a pandas dataframe. fmu-ensemble is used to find the file names on the scratch disk The well connection status data is extracted from the CPI data, which is 0 if the connection is SHUT and >0 if the connection is OPEN. This is independent of the status of the well. """ ens = ScratchEnsemble("ens", ensemble_path) df_files = ens.find_files(well_connection_status_file) if df_files.empty: return None df = pd.DataFrame() for _, row in df_files.iterrows(): df_real = pd.read_csv(row.FULLPATH) df_real["REAL"] = row.REAL df = pd.concat([df, df_real]) df.I = pd.to_numeric(df.I) df.J = pd.to_numeric(df.J) df["K1"] = pd.to_numeric(df.K) df = df.drop(["K"], axis=1) df.DATE = pd.to_datetime(df.DATE).dt.date return df
def test_get_df_merge(): """Testing merge support in get_df()""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_scalar("npv.txt") reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv") outputs = reekensemble.load_txt("outputs.txt") vens = reekensemble.to_virtual() params = vens.get_df("parameters.txt") smrycount = len(vens.get_df("unsmry--yearly").columns) smryparams = vens.get_df("unsmry--yearly", merge="parameters") # The "minus 1" is due to the REAL column being present in both tables. assert len(smryparams.columns) == len(params.columns) + smrycount - 1 paramsoutputs = vens.get_df("parameters", merge=["outputs"]) assert len(paramsoutputs.columns) == len(params.columns) + len(outputs.columns) - 1 assert ( len(vens.get_df("unsmry--yearly", merge=["parameters", "outputs"]).columns) == smrycount + len(params.columns) + len(outputs.columns) - 2 ) assert ( len(vens.get_df("parameters", merge="npv.txt").columns) == len(params.columns) + 1 ) # Symmetry: assert ( len(vens.get_df("npv.txt", merge="parameters.txt").columns) == len(params.columns) + 1 ) # Merge with zone data, inject a mocked dataframe to the realization: vens.data["fipnum2zone"] = pd.DataFrame( columns=["FIPNUM", "ZONE"], data=[ [1, "UpperReek"], [2, "MidReek"], [3, "LowerReek"], [4, "UpperReek"], [5, "MidReek"], [6, "LowerReek"], ], ) volframe = vens.get_df("simulator_volume_fipnum", merge="fipnum2zone") assert "ZONE" in volframe assert "FIPNUM" in volframe assert "STOIIP_OIL" in volframe assert len(volframe["ZONE"].unique()) == 3
def test_noparameters(tmpdir): testdir = os.path.dirname(os.path.abspath(__file__)) ensdir = os.path.join(testdir, "data/testensemble-reek001/") tmpdir.chdir() symlink_iter(ensdir, "iter-0") symlink_iter(ensdir, "iter-1") iter0 = ScratchEnsemble("iter-0", str(tmpdir.join("realization-*/iter-0"))) iter1 = ScratchEnsemble("iter-1", str(tmpdir.join("realization-*/iter-1"))) ensset = EnsembleSet("reek001", [iter0, iter1]) assert not ensset.parameters.empty # Remove it each realization: ensset.remove_data("parameters.txt") assert ensset.parameters.empty # However, when parameters.txt is excplicitly asked for, # an exception should be raised: with pytest.raises(KeyError): ensset.get_df("parameters.txt") ensset.load_smry(time_index="yearly", column_keys="FOPT") assert not ensset.get_df("unsmry--yearly").empty with pytest.raises(KeyError): ensset.get_df("unsmry--yearly", merge="parameters.txt")
def test_volumetric_rates(): """Test computation of cumulative compatible rates """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") cum_df = ens.get_smry(column_keys=["F*T", "W*T*"], time_index="yearly") vol_rate_df = ens.get_volumetric_rates(column_keys=["F*T", "W*T*"], time_index="yearly") assert "DATE" in vol_rate_df assert "FWCR" not in vol_rate_df assert "FOPR" in vol_rate_df assert "FWPR" in vol_rate_df # Test each realization individually for realidx in vol_rate_df["REAL"].unique(): vol_rate_real = vol_rate_df.set_index("REAL").loc[realidx] cum_real = cum_df.set_index("REAL").loc[realidx] assert len(vol_rate_real) == 5 assert vol_rate_real["FOPR"].sum() == cum_real["FOPT"].iloc[-1]
def scratch_ensemble( ensemble_name: str, ensemble_path: Path, filter_file: Union[str, None] = "OK" ) -> ScratchEnsemble: return ( ScratchEnsemble(ensemble_name, ensemble_path) if filter_file is None else ScratchEnsemble(ensemble_name, ensemble_path).filter(filter_file) )
def test_pred_dir(): """Test import of a stripped 5 realization ensemble, manually doubled to two identical ensembles, plus a prediction ensemble """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ensdir = os.path.join(testdir, "data/testensemble-reek001/") # Copy iter-0 to iter-1, creating an identical ensemble # we can load for testing. Delete in case it exists for realizationdir in glob.glob(ensdir + "/realization-*"): if os.path.exists(realizationdir + "/iter-1"): os.remove(realizationdir + "/iter-1") os.symlink(realizationdir + "/iter-0", realizationdir + "/iter-1") if os.path.exists(realizationdir + "/pred-dg3"): os.remove(realizationdir + "/pred-dg3") os.symlink(realizationdir + "/iter-0", realizationdir + "/pred-dg3") # Initialize differently, using only the root path containing # realization-*. The frompath argument does not support # anything but iter-* naming convention for ensembles (yet?) ensset = EnsembleSet("foo", frompath=ensdir) assert len(ensset) == 2 assert isinstance(ensset["iter-0"], ScratchEnsemble) assert isinstance(ensset["iter-1"], ScratchEnsemble) # We need to be more explicit to include the pred-dg3 directory: pred_ens = ScratchEnsemble("pred-dg3", ensdir + "realization-*/pred-dg3") ensset.add_ensemble(pred_ens) assert isinstance(ensset["pred-dg3"], ScratchEnsemble) assert len(ensset) == 3 # Check the flagging in aggregated data: yearlysum = ensset.load_smry(time_index="yearly") assert "ENSEMBLE" in yearlysum.columns ens_list = list(yearlysum["ENSEMBLE"].unique()) assert len(ens_list) == 3 assert "pred-dg3" in ens_list assert "iter-0" in ens_list assert "iter-1" in ens_list # Try to add a new ensemble with a similar name to an existing: foo_ens = ScratchEnsemble("pred-dg3", ensdir + "realization-*/iter-1") with pytest.raises(ValueError): ensset.add_ensemble(foo_ens) assert len(ensset) == 3 # Delete the symlinks when we are done. for realizationdir in glob.glob(ensdir + "/realization-*"): os.remove(realizationdir + "/iter-1") os.remove(realizationdir + "/pred-dg3")
def load_ensemble_set(self) -> EnsembleSet: return EnsembleSet( self.ensemble_set_name, [ ScratchEnsemble(ens_name, ens_path) if self.filter_file is None else ScratchEnsemble( ens_name, ens_path).filter(self.filter_file) for ens_name, ens_path in self.ensemble_paths.items() ], )
def test_read_eclgrid(): """Test reading Eclipse grids of a full ensemble""" testdir = os.path.dirname(os.path.abspath(__file__)) reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) grid_df = reekensemble.get_eclgrid(["PERMX", "FLOWATI+", "FLOWATJ+"], report=1) assert len(grid_df.columns) == 35 assert len(grid_df["i"]) == 35840
def load_ensemble(self) -> ScratchEnsemble: ensemble = (ScratchEnsemble(self.ensemble_name, self.ensemble_path) if self.filter_file is None else ScratchEnsemble( self.ensemble_name, self.ensemble_path).filter( self.filter_file)) if ensemble.realizations == {}: raise ValueError( f"No realizations found for ensemble {self.ensemble_name}, " f"located at '{self.ensemble_path}'. " "Aborting...") return ensemble
def load_ensemble_set( ensemble_paths: dict, ensemble_set_name: str = "EnsembleSet", filter_file: Union[str, None] = "OK", ): return EnsembleSet( ensemble_set_name, [ ScratchEnsemble(ens_name, ens_path) if filter_file is None else ScratchEnsemble(ens_name, ens_path).filter(filter_file) for ens_name, ens_path in ensemble_paths.items() ], )
def test_nonexisting(): """Test what happens when we try to initialize from a filesystem path that does not exist""" empty = ScratchEnsemble("nothing", "/foo/bar/com/not_existing") assert not empty # This ensemble does not exist, but we should ensure no crash # when we encounter Permission Denied on /scratch/johan_sverdrup nopermission = ScratchEnsemble( "noaccess", "/scratch/johan_sverdrup/js_phase5/" + "foo/realization-*/iter-0") assert not nopermission
def test_ertrunpathfile(): """Initialize an ensemble from an ERT runpath file""" cwd = os.getcwd() if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") # The example runpathfile contains relative paths, which is not realistic # for real runpathfiles coming from ERT. But relative paths are more easily # handled in git and with pytest, so we have to try some magic # to get it to work: if "tests" not in os.getcwd(): if os.path.exists("tests"): os.chdir("tests") else: pytest.skip("Did not find test data") if not os.path.exists("data"): pytest.skip("Did not find test data") # The ertrunpathfile used here assumes we are in the 'tests' directory ens = ScratchEnsemble("ensfromrunpath", runpathfile=testdir + "/data/ert-runpath-file") assert len(ens) == 5 assert all([os.path.isabs(x) for x in ens.files["FULLPATH"]]) # Check that the UNSMRY files has been discovered, they should always be # because ECLBASE is given in the runpathfile assert sum(["UNSMRY" in x for x in ens.files["BASENAME"].unique()]) == 5 os.chdir(cwd)
def test_ens_mismatch(): """Test calculation of mismatch to ensemble data""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/") obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]}) mismatch = obs.mismatch(ens) assert "L1" in mismatch.columns assert "L2" in mismatch.columns assert "MISMATCH" in mismatch.columns assert "OBSKEY" in mismatch.columns assert "OBSTYPE" in mismatch.columns assert "REAL" in mismatch.columns assert len(mismatch) == len(ens) * 1 # number of observation units. fopt_rank = mismatch.sort_values("L2", ascending=True)["REAL"].values assert fopt_rank[0] == 2 # closest realization assert fopt_rank[-1] == 1 # worst realization # Try again with reference to non-existing vectors: obs = Observations( {"smryh": [{ "key": "FOPTFLUFF", "histvec": "FOPTFLUFFH" }]}) mismatch = obs.mismatch(ens) assert mismatch.empty
def test_yaml(): """Test loading batch commands from yaml files""" # This is subject to change yamlstr = """ scratch_ensembles: iter1: data/testensemble-reek001/realization-*/iter-0 batch: - load_scalar: localpath: npv.txt - load_smry: column_keys: FOPT time_index: yearly - load_smry: column_keys: "*" time_index: daily""" ymlconfig = yaml.safe_load(yamlstr) testdir = os.path.dirname(os.path.abspath(__file__)) os.chdir(testdir) ensset = EnsembleSet() for ensname, enspath in ymlconfig["scratch_ensembles"].items(): ensset.add_ensemble(ScratchEnsemble(ensname, paths=enspath)) ensset.process_batch(ymlconfig["batch"]) assert "parameters.txt" in ensset.keys() assert "OK" in ensset.keys() assert "npv.txt" in ensset.keys() assert not ensset.get_df("unsmry--yearly").empty
def _dump_smry_to_csv_using_fmu(ens_path: str, time_index: str, output_csv_file: str) -> None: scratch_ensemble = ScratchEnsemble("tempEnsName", paths=ens_path) df = scratch_ensemble.load_smry(time_index=time_index) df.sort_values(["DATE", "REAL"], inplace=True) print("Dataframe shape::", df.shape) unique_dates = df["DATE"].unique() print("Num unique dates:", len(unique_dates)) print(unique_dates) unique_reals = df["REAL"].unique() print("Num unique reals:", len(unique_reals)) print(unique_reals) df.to_csv(output_csv_file, index=False)
def load_per_real_csv_file_using_fmu(ens_path: str, csv_file_rel_path: str) -> pd.DataFrame: LOGGER.debug( f"load_per_real_csv_file_using_fmu() starting - {csv_file_rel_path}") timer = PerfTimer() scratch_ensemble = ScratchEnsemble("tempEnsName", ens_path, autodiscovery=True) df = scratch_ensemble.load_csv(csv_file_rel_path) LOGGER.debug( f"load_per_real_csv_file_using_fmu() finished in: {timer.elapsed_s():.2f}s" ) return df
def test_read_eclgrid(): """Test reading Eclipse grids of a full ensemble This is a cpu-intensive test Will silently pass if the directory does not exist""" if not os.path.exists("/scratch/fmu/akia/3_r001_reek/realization-1"): pytest.skip("Only works on Stavanger Linux") ensemble_path = "/scratch/fmu/akia/3_r001_reek/realization-*1/iter-0" reekensemble = ScratchEnsemble("ensemblename", ensemble_path) grid_df = reekensemble.get_eclgrid(["PERMX", "FLOWATI+", "FLOWATJ+"], report=4) assert len(grid_df.columns) == 35 assert len(grid_df["i"]) == 35840
def test_volumetric_rates(): """Test the summary resampling code for virtual ensembles We only need to test the aggregation here. """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0" ) reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_scalar("npv.txt") vens = reekensemble.to_virtual() vol_rates = vens.get_volumetric_rates(column_keys="FOPT", time_index="yearly") assert isinstance(vol_rates, pd.DataFrame) assert "REAL" in vol_rates assert "DATE" in vol_rates assert "FOPR" in vol_rates assert len(vol_rates) == 25
def test_manual_aggregation(): """Test that aggregating an ensemble using RealizationCombination is the same as calling agg() on the ensemble""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) reekensemble.load_csv("share/results/volumes/simulator_volume_fipnum.csv") # Aggregate an ensemble into a virtual "mean" realization mean = reekensemble.agg("mean") # Combine the ensemble members directly into a mean computation. # Also returns a virtual realization. manualmean = (1 / 5 * (reekensemble[0] + reekensemble[1] + reekensemble[2] + reekensemble[3] + reekensemble[4])) # Commutativity proof: assert mean["parameters"]["RMS_SEED"] == manualmean["parameters"][ "RMS_SEED"]
def _discover_ensemble_realizations_fmu(ens_path: str) -> Dict[int, str]: """Returns dict indexed by realization number and with runpath as value""" scratch_ensemble = ScratchEnsemble("dummyEnsembleName", paths=ens_path).filter("OK") real_dict = { i: r.runpath() for i, r in scratch_ensemble.realizations.items() } return real_dict
def load_ensemble_set(ensemble_paths: tuple, ensemble_set_name: str = "EnsembleSet"): return EnsembleSet( ensemble_set_name, [ ScratchEnsemble(ens_name, ens_path) for ens_name, ens_path in ensemble_paths ], )
def test_get_smry_meta(tmpdir): """Test the conservation of smry meta-data in virtual ensembles""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") reekensemble = ScratchEnsemble( "reekmetatest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # If no smry loaded before virtualization, nothing should be there: assert "__smry_metadata" not in reekensemble.to_virtual().keys() reekensemble.load_smry(time_index="yearly", column_keys=["F*"]) origmeta = reekensemble.get_smry_meta() vens = reekensemble.to_virtual() assert "__smry_metadata" in vens.keys() meta = vens.get_df("__smry_metadata") # Internally it is stored as a DataFrame, we check that # since it is possible to get it using get_df(), and thereby # almost part of the API assert isinstance(meta, pd.DataFrame) # But rather users should use get_smry_meta() to obtain # stuff from the internal frame __smry_metadata: metadict = vens.get_smry_meta() assert isinstance(metadict, dict) assert len(metadict) + 2 == len( vens.get_smry(time_index="yearly", column_keys="*").columns) # (the vens only knows of F* columns) assert len(metadict) + 2 == len( vens.get_smry(time_index="yearly", column_keys="F*").columns) assert origmeta["FOPT"] == metadict["FOPT"] assert origmeta["FWPTH"] == metadict["FWPTH"] assert not vens.get_smry_meta([]) assert vens.get_smry_meta(column_keys="FOPT")["FOPT"] == origmeta["FOPT"] assert not vens.get_smry_meta(column_keys="WOPT:NOTEXISTING") # Test that it is retrievable after dumping to disk: vens_disk_path = str(tmpdir.join("vens_dumped")) vens.to_disk(vens_disk_path) disk_vens = VirtualEnsemble(fromdisk=vens_disk_path) metadict = disk_vens.get_smry_meta() assert isinstance(metadict, dict) assert len(metadict) + 2 == len( vens.get_smry(time_index="yearly", column_keys="*").columns) # (the vens only knows of F* columns) assert len(metadict) + 2 == len( vens.get_smry(time_index="yearly", column_keys="F*").columns) assert origmeta["FOPT"] == metadict["FOPT"] assert origmeta["FWPTH"] == metadict["FWPTH"]
def test_apply(tmpdir): """ Test the callback functionality """ if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") tmpdir.chdir() symlink_iter(os.path.join(testdir, "data/testensemble-reek001"), "iter-0") ens = ScratchEnsemble("reektest", "realization-*/iter-0") def ex_func1(): """Example function that will return a constant dataframe""" return pd.DataFrame(index=["1", "2"], columns=["foo", "bar"], data=[[1, 2], [3, 4]]) result = ens.apply(ex_func1) assert isinstance(result, pd.DataFrame) assert "REAL" in result.columns assert len(result) == 10 # Check that we can internalize as well ens.apply(ex_func1, localpath="df-1234") int_df = ens.get_df("df-1234") assert "REAL" in int_df assert len(int_df) == len(result) if SKIP_FMU_TOOLS: return # Test if we can wrap the volumetrics-parser in fmu.tools: # It cannot be applied directly, as we need to combine the # realization's root directory with the relative path coming in: def rms_vol2df(kwargs): """Example function for bridging with fmu.tools to parse volumetrics""" fullpath = os.path.join(kwargs["realization"].runpath(), kwargs["filename"]) # The supplied callback should not fail too easy. if os.path.exists(fullpath): return volumetrics.rmsvolumetrics_txt2df(fullpath) return pd.DataFrame() rmsvols_df = ens.apply(rms_vol2df, filename="share/results/volumes/" + "geogrid_vol_oil_1.txt") assert rmsvols_df["STOIIP_OIL"].sum() > 0 assert len(rmsvols_df["REAL"].unique()) == 4
def test_filedescriptors(): """Test how filedescriptors are used. The lazy_load option to EclSum affects this, if it is set to True file descriptors are not closed (and True is the default). In order to be able to open thousands of smry files, we need to always close the file descriptors when possible, and therefore lazy_load should be set to False in realization.py""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") fd_dir = "/proc/" + str(os.getpid()) + "/fd" if not os.path.exists(fd_dir): print("Counting file descriptors on non-Linux not supported") return fd_count1 = len(os.listdir(fd_dir)) reekensemble = ScratchEnsemble( "reektest", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0") # fd_count2 = len(os.listdir(fd_dir)) reekensemble.load_smry() # fd_count3 = len(os.listdir(fd_dir)) del reekensemble fd_count4 = len(os.listdir(fd_dir)) # As long as lazy_load = False, we should have 5,5,5,5 from this # If lazy_load is True (default), then we get 15, 15, 25, 20 # (that last number pattern reveals a (now fixed) bug in EclSum) # print(fd_count1, fd_count2, fd_count3, fd_count4) assert fd_count1 == fd_count4
def _get_or_create_scratch_ensemble(self, ens_name: str, ens_path: str) -> ScratchEnsemble: """Either creates a new ScratchEnsemble or retrieves a previously created one from our internal cache. Note that we cache a pickled version of ScratchEnsembles to avoid memory bloating as data is loaded and internalized by the ensemble """ key = json.dumps({"ens_name": ens_name, "ens_path": ens_path}) if key in self._scratch_ensemble_cache: return pickle.loads(self._scratch_ensemble_cache[key]) # nosec scratch_ensemble = ScratchEnsemble(ens_name, ens_path).filter("OK") self._scratch_ensemble_cache[key] = pickle.dumps( scratch_ensemble, pickle.HIGHEST_PROTOCOL) return scratch_ensemble
def test_ens_failedreals(): """Ensure we can calculate mismatch where some realizations do not have UNSMRY data""" if "__file__" in globals(): # Easen up copying test code into interactive sessions testdir = os.path.dirname(os.path.abspath(__file__)) else: testdir = os.path.abspath(".") ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/", autodiscovery=False, ) obs = Observations({"smryh": [{"key": "FOPT", "histvec": "FOPTH"}]}) mismatch = obs.mismatch(ens) # There are no UNSMRY found, so the mismatch should be empty: assert mismatch.empty ens.find_files("eclipse/model/*UNSMRY") assert not obs.mismatch(ens).empty # Reinitialize ens = ScratchEnsemble( "test", testdir + "/data/testensemble-reek001/" + "realization-*/iter-0/", autodiscovery=False, ) # Redirect UNSMRY pointer in realizaion 3 so it isn't found ens.find_files("eclipse/model/*UNSMRY") real3files = ens[3].files real3files.loc[real3files["FILETYPE"] == "UNSMRY", "FULLPATH"] = "FOO" # Check that we only have EclSum for 2 and not for 3: assert ens[2].get_eclsum() assert not ens[3].get_eclsum() missingsmry = obs.mismatch(ens) # Realization 3 should NOT be present now assert 3 not in list(missingsmry["REAL"]) assert not obs.mismatch(ens).empty