Exemplo n.º 1
0
def test_noparameters(tmpdir):
    testdir = os.path.dirname(os.path.abspath(__file__))
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    tmpdir.chdir()
    symlink_iter(ensdir, "iter-0")
    symlink_iter(ensdir, "iter-1")

    iter0 = ScratchEnsemble("iter-0", str(tmpdir.join("realization-*/iter-0")))
    iter1 = ScratchEnsemble("iter-1", str(tmpdir.join("realization-*/iter-1")))

    ensset = EnsembleSet("reek001", [iter0, iter1])
    assert not ensset.parameters.empty

    # Remove it each realization:
    ensset.remove_data("parameters.txt")
    assert ensset.parameters.empty

    # However, when parameters.txt is excplicitly asked for,
    # an exception should be raised:
    with pytest.raises(KeyError):
        ensset.get_df("parameters.txt")

    ensset.load_smry(time_index="yearly", column_keys="FOPT")
    assert not ensset.get_df("unsmry--yearly").empty
    with pytest.raises(KeyError):
        ensset.get_df("unsmry--yearly", merge="parameters.txt")
Exemplo n.º 2
0
def test_mangling_data(tmpdir):
    """Test import of a stripped 5 realization ensemble,
    manually doubled to two identical ensembles,
    and then with some data removed
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    tmpdir.chdir()

    symlink_iter(ensdir, "iter-0")
    # Copy iter-0 to iter-1, creating an identical ensemble<
    # we can load for testing. Delete in case it exists
    for realizationdir in glob.glob("realization-*"):
        # Symlink each file/dir individually (so we can remove some)
        os.mkdir(realizationdir + "/iter-1")
        for realizationcomponent in glob.glob(realizationdir + "/iter-0/*"):
            if ("parameters.txt" not in realizationcomponent) and (
                "outputs.txt" not in realizationcomponent
            ):
                os.symlink(
                    realizationcomponent,
                    realizationcomponent.replace("iter-0", "iter-1"),
                )

    # Trigger warnings:
    assert not EnsembleSet()  # warning given
    assert not EnsembleSet(["bargh"])  # warning given
    assert not EnsembleSet("bar")  # No warning, just empty
    EnsembleSet("foobar", frompath="foobarth")  # trigger warning

    ensset = EnsembleSet("foo", frompath=".")
    assert len(ensset) == 2
    assert isinstance(ensset["iter-0"], ScratchEnsemble)
    assert isinstance(ensset["iter-1"], ScratchEnsemble)

    assert "parameters.txt" in ensset.keys()

    # We should only have parameters in iter-0
    params = ensset.get_df("parameters.txt")
    assert len(params) == 5
    assert params["ENSEMBLE"].unique() == "iter-0"

    ensset.load_txt("outputs.txt")
    assert "outputs.txt" in ensset.keys()
    assert len(ensset.get_df("outputs.txt")) == 4

    # When it does not exist in any of the ensembles, we
    # should error
    with pytest.raises((KeyError, ValueError)):
        ensset.get_df("foobar")
Exemplo n.º 3
0
def test_yaml():
    """Test loading batch commands from yaml files"""

    # This is subject to change

    yamlstr = """
scratch_ensembles:
  iter1: data/testensemble-reek001/realization-*/iter-0
batch:
  - load_scalar:
      localpath: npv.txt
  - load_smry:
      column_keys: FOPT
      time_index: yearly
  - load_smry:
      column_keys: "*"
      time_index: daily"""
    ymlconfig = yaml.safe_load(yamlstr)

    testdir = os.path.dirname(os.path.abspath(__file__))
    os.chdir(testdir)
    ensset = EnsembleSet()

    for ensname, enspath in ymlconfig["scratch_ensembles"].items():
        ensset.add_ensemble(ScratchEnsemble(ensname, paths=enspath))
    ensset.process_batch(ymlconfig["batch"])

    assert "parameters.txt" in ensset.keys()
    assert "OK" in ensset.keys()
    assert "npv.txt" in ensset.keys()
    assert not ensset.get_df("unsmry--yearly").empty
Exemplo n.º 4
0
def test_ensembleset_reek001(tmp="TMP"):
    """Test import of a stripped 5 realization ensemble,
    manually doubled to two identical ensembles
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    # Copy iter-0 to iter-1, creating an identical ensemble
    # we can load for testing.
    for realizationdir in glob.glob(ensdir + "/realization-*"):
        if os.path.exists(realizationdir + "/iter-1"):
            if os.path.islink(realizationdir + "/iter-1"):
                os.remove(realizationdir + "/iter-1")
            else:
                shutil.rmtree(realizationdir + "/iter-1")
        os.symlink(realizationdir + "/iter-0", realizationdir + "/iter-1")

    iter0 = ScratchEnsemble("iter-0", ensdir + "/realization-*/iter-0")
    iter1 = ScratchEnsemble("iter-1", ensdir + "/realization-*/iter-1")

    ensset = EnsembleSet("reek001", [iter0, iter1])
    assert len(ensset) == 2
    assert len(ensset["iter-0"].get_df("STATUS")) == 250
    assert len(ensset["iter-1"].get_df("STATUS")) == 250

    # Try adding the same object over again
    try:
        ensset.add_ensemble(iter0)
    except ValueError:
        pass
    assert len(ensset) == 2  # Unchanged!

    # Initializing nothing, we get warning about the missing name
    noname = EnsembleSet()
    assert noname.name  # not None
    assert isinstance(noname.name, str)  # And it should be a string

    # Initialize starting from empty ensemble
    ensset2 = EnsembleSet("reek001", [])
    assert ensset2.name == "reek001"
    ensset2.add_ensemble(iter0)
    ensset2.add_ensemble(iter1)
    assert len(ensset2) == 2

    # Check that we can skip the empty list:
    ensset2x = EnsembleSet("reek001")
    ensset2x.add_ensemble(iter0)
    ensset2x.add_ensemble(iter1)
    assert len(ensset2x) == 2

    # Initialize directly from path with globbing:
    ensset3 = EnsembleSet("reek001direct", [])
    assert ensset3.name == "reek001direct"
    ensset3.add_ensembles_frompath(ensdir)
    assert len(ensset3) == 2

    # Alternative globbing:
    ensset4 = EnsembleSet("reek001direct2", frompath=ensdir)
    assert len(ensset4) == 2

    # Testing aggregation of parameters
    paramsdf = ensset3.parameters
    if not os.path.exists(tmp):
        os.mkdir(tmp)
    paramsdf.to_csv(os.path.join(tmp, "enssetparams.csv"), index=False)
    assert isinstance(paramsdf, pd.DataFrame)
    assert len(ensset3.parameters) == 10
    assert len(ensset3.parameters.columns) == 27
    assert "ENSEMBLE" in ensset3.parameters.columns
    assert "REAL" in ensset3.parameters.columns

    outputs = ensset3.load_txt("outputs.txt")
    assert "NPV" in outputs.columns

    # Test Eclipse summary handling:
    assert len(ensset3.get_smry_dates(freq="report")) == 641
    assert len(ensset3.get_smry_dates(freq="monthly")) == 37
    assert len(ensset3.load_smry(column_keys=["FOPT"],
                                 time_index="yearly")) == 50
    monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "ENSEMBLE"
    assert monthly.columns[1] == "REAL"
    assert monthly.columns[2] == "DATE"

    # Eclipse well names
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames("WI*")) == 3
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames()) == 8

    # Check that we can retrieve cached versions
    assert len(ensset3.get_df("unsmry--monthly")) == 380
    assert len(ensset3.get_df("unsmry--yearly")) == 50
    monthly.to_csv(os.path.join(tmp, "ensset-monthly.csv"), index=False)

    with pytest.raises(ValueError):
        ensset3.get_df("unsmry--weekly")

    # Check errors when we ask for stupid stuff
    with pytest.raises(ValueError):
        ensset3.load_csv("bogus.csv")
    with pytest.raises(ValueError):
        ensset3.get_df("bogus.csv")

    # Check get_smry()
    smry = ensset3.get_smry(time_index="yearly",
                            column_keys=["FWCT", "FGOR"],
                            end_date="2002-02-01")
    assert "ENSEMBLE" in smry
    assert "REAL" in smry
    assert len(smry["ENSEMBLE"].unique()) == 2
    assert len(smry["REAL"].unique()) == 5
    assert "FWCT" in smry
    assert "FGOR" in smry
    assert "DATE" in smry
    assert len(smry) == 40

    # Eclipse well names list
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames(None)) == 8
    assert len(ensset3.get_wellnames()) == 8
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8

    # Test aggregation of csv files:
    vol_df = ensset3.load_csv("share/results/volumes/" +
                              "simulator_volume_fipnum.csv")
    assert "REAL" in vol_df
    assert "ENSEMBLE" in vol_df
    assert len(vol_df["REAL"].unique()) == 3
    assert len(vol_df["ENSEMBLE"].unique()) == 2
    assert len(ensset3.keys()) == 7

    # Test scalar imports:
    ensset3.load_scalar("npv.txt")
    npv = ensset3.get_df("npv.txt")
    assert "ENSEMBLE" in npv
    assert "REAL" in npv
    assert "npv.txt" in npv
    assert len(npv) == 10
    # Scalar import with forced numerics:
    ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True)
    npv = ensset3.get_df("npv.txt")
    assert len(npv) == 8

    predel_len = len(ensset3.keys())
    ensset3.drop("parameters.txt")
    assert len(ensset3.keys()) == predel_len - 1

    # Test callback functionality, that we can convert rms
    # volumetrics in each realization. First we need a
    # wrapper which is able to work on ScratchRealizations.
    def rms_vol2df(kwargs):
        fullpath = os.path.join(kwargs["realization"].runpath(),
                                kwargs["filename"])
        # The supplied callback should not fail too easy.
        if os.path.exists(fullpath):
            return volumetrics.rmsvolumetrics_txt2df(fullpath)
        else:
            return pd.DataFrame()

    if not SKIP_FMU_TOOLS:
        rmsvols_df = ensset3.apply(rms_vol2df,
                                   filename="share/results/volumes/" +
                                   "geogrid_vol_oil_1.txt")
        assert rmsvols_df["STOIIP_OIL"].sum() > 0
        assert len(rmsvols_df["REAL"].unique()) == 4
        assert len(rmsvols_df["ENSEMBLE"].unique()) == 2

        # Test that we can dump to disk as well and load from csv:
        ensset3.apply(
            rms_vol2df,
            filename="share/results/volumes/" + "geogrid_vol_oil_1.txt",
            localpath="share/results/volumes/geogrid--oil.csv",
            dumptodisk=True,
        )
        geogrid_oil = ensset3.load_csv(
            "share/results/volumes/geogrid--oil.csv")
        assert len(geogrid_oil["REAL"].unique()) == 4
        assert len(geogrid_oil["ENSEMBLE"].unique()) == 2
        # Clean up what we just dumped:
        for real_dir in glob.glob(ensdir + "/realization-*"):
            csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv"
            if os.path.exists(csvfile):
                os.remove(csvfile)

    # Initialize differently, using only the root path containing
    # realization-*
    ensset4 = EnsembleSet("foo", frompath=ensdir)
    assert len(ensset4) == 2
    assert isinstance(ensset4["iter-0"], ScratchEnsemble)
    assert isinstance(ensset4["iter-1"], ScratchEnsemble)

    # Delete the symlink and leftover from apply-testing when we are done.
    for real_dir in glob.glob(ensdir + "/realization-*"):
        if not SKIP_FMU_TOOLS:
            csvfile = real_dir + "/iter-0/share/results/volumes/geogrid--oil.csv"
            if os.path.exists(csvfile):
                os.remove(csvfile)
        if os.path.exists(real_dir + "/iter-1"):
            os.remove(real_dir + "/iter-1")
Exemplo n.º 5
0
def test_ensembleset_reek001(tmpdir):
    """Test import of a stripped 5 realization ensemble,
    manually doubled to two identical ensembles
    """

    if "__file__" in globals():
        # Easen up copying test code into interactive sessions
        testdir = os.path.dirname(os.path.abspath(__file__))
    else:
        testdir = os.path.abspath(".")
    ensdir = os.path.join(testdir, "data/testensemble-reek001/")

    tmpdir.chdir()
    symlink_iter(ensdir, "iter-0")
    symlink_iter(ensdir, "iter-1")

    iter0 = ScratchEnsemble("iter-0", str(tmpdir.join("realization-*/iter-0")))
    iter1 = ScratchEnsemble("iter-1", str(tmpdir.join("realization-*/iter-1")))

    ensset = EnsembleSet("reek001", [iter0, iter1])

    assert len(ensset) == 2
    assert len(ensset["iter-0"].get_df("STATUS")) == 250
    assert len(ensset["iter-1"].get_df("STATUS")) == 250

    # Try adding the same object over again
    try:
        ensset.add_ensemble(iter0)
    except ValueError:
        pass
    assert len(ensset) == 2  # Unchanged!

    # Initializing nothing, we get warning about the missing name
    noname = EnsembleSet()
    assert noname.name  # not None
    assert isinstance(noname.name, str)  # And it should be a string

    # Initialize starting from empty ensemble
    ensset2 = EnsembleSet("reek001", [])
    assert ensset2.name == "reek001"
    ensset2.add_ensemble(iter0)
    ensset2.add_ensemble(iter1)
    assert len(ensset2) == 2

    # Check that we can skip the empty list:
    ensset2x = EnsembleSet("reek001")
    ensset2x.add_ensemble(iter0)
    ensset2x.add_ensemble(iter1)
    assert len(ensset2x) == 2

    # Initialize directly from path with globbing:
    ensset3 = EnsembleSet("reek001direct", [])
    assert ensset3.name == "reek001direct"
    ensset3.add_ensembles_frompath(".")
    assert len(ensset3) == 2

    # Alternative globbing:
    ensset4 = EnsembleSet("reek001direct2", frompath=".")
    assert len(ensset4) == 2

    # Testing aggregation of parameters
    paramsdf = ensset3.parameters
    paramsdf.to_csv("enssetparams.csv", index=False)
    assert isinstance(paramsdf, pd.DataFrame)
    assert len(ensset3.parameters) == 10
    assert len(ensset3.parameters.columns) == 27
    assert "ENSEMBLE" in ensset3.parameters.columns
    assert "REAL" in ensset3.parameters.columns

    outputs = ensset3.load_txt("outputs.txt")
    assert "NPV" in outputs.columns

    # Test Eclipse summary handling:
    assert len(ensset3.get_smry_dates(freq="report")) == 641
    assert len(ensset3.get_smry_dates(freq="monthly")) == 37
    assert len(ensset3.load_smry(column_keys=["FOPT"],
                                 time_index="yearly")) == 50
    monthly = ensset3.load_smry(column_keys=["F*"], time_index="monthly")
    assert monthly.columns[0] == "ENSEMBLE"
    assert monthly.columns[1] == "REAL"
    assert monthly.columns[2] == "DATE"
    raw = ensset3.load_smry(column_keys=["F*PT"], time_index="raw")
    assert ensset3.load_smry(column_keys=["F*PT"]).iloc[3, 4] == raw.iloc[3, 4]
    assert (ensset3.load_smry(column_keys=["F*PT"],
                              time_index=None).iloc[3, 5] == raw.iloc[3, 5])

    # Eclipse well names
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames("WI*")) == 3
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames()) == 8

    # Check that we can retrieve cached versions
    assert len(ensset3.get_df("unsmry--monthly")) == 380
    assert len(ensset3.get_df("unsmry--yearly")) == 50
    monthly.to_csv("ensset-monthly.csv", index=False)

    # Test merging in get_df()
    param_output = ensset3.get_df("parameters.txt", merge="outputs")
    assert "top_structure" in param_output
    assert "SORG1" in param_output

    smry_params = ensset3.get_df("unsmry--monthly", merge="parameters")
    assert "SORG1" in smry_params
    assert "FWCT" in smry_params

    # Merging with something that does not exist:
    with pytest.raises(KeyError):
        ensset3.get_df("unsmry--monthly", merge="barrFF")

    with pytest.raises((KeyError, ValueError)):
        ensset3.get_df("unsmry--weekly")

    # Check errors when we ask for stupid stuff
    with pytest.raises((KeyError, ValueError)):
        ensset3.load_csv("bogus.csv")
    with pytest.raises((KeyError, ValueError)):
        ensset3.get_df("bogus.csv")

    # Check get_smry()
    smry = ensset3.get_smry(time_index="yearly",
                            column_keys=["FWCT", "FGOR"],
                            end_date="2002-02-01")
    assert "ENSEMBLE" in smry
    assert "REAL" in smry
    assert len(smry["ENSEMBLE"].unique()) == 2
    assert len(smry["REAL"].unique()) == 5
    assert "FWCT" in smry
    assert "FGOR" in smry
    assert "DATE" in smry
    assert len(smry) == 40

    # Eclipse well names list
    assert len(ensset3.get_wellnames("OP*")) == 5
    assert len(ensset3.get_wellnames(None)) == 8
    assert len(ensset3.get_wellnames()) == 8
    assert not ensset3.get_wellnames("")
    assert len(ensset3.get_wellnames(["OP*", "WI*"])) == 8

    # Test aggregation of csv files:
    vol_df = ensset3.load_csv("share/results/volumes/" +
                              "simulator_volume_fipnum.csv")
    assert "REAL" in vol_df
    assert "ENSEMBLE" in vol_df
    assert len(vol_df["REAL"].unique()) == 3
    assert len(vol_df["ENSEMBLE"].unique()) == 2
    assert len(ensset3.keys()) == 8

    # Test scalar imports:
    ensset3.load_scalar("npv.txt")
    npv = ensset3.get_df("npv.txt")
    assert "ENSEMBLE" in npv
    assert "REAL" in npv
    assert "npv.txt" in npv
    assert len(npv) == 10
    # Scalar import with forced numerics:
    ensset3.load_scalar("npv.txt", convert_numeric=True, force_reread=True)
    npv = ensset3.get_df("npv.txt")
    assert len(npv) == 8

    predel_len = len(ensset3.keys())
    ensset3.drop("parameters.txt")
    assert len(ensset3.keys()) == predel_len - 1

    # Test callback functionality, that we can convert rms
    # volumetrics in each realization. First we need a
    # wrapper which is able to work on ScratchRealizations.
    def rms_vol2df(kwargs):
        """Callback function to be sent to ensemble objects"""
        fullpath = os.path.join(kwargs["realization"].runpath(),
                                kwargs["filename"])
        # The supplied callback should not fail too easy.
        if os.path.exists(fullpath):
            return volumetrics.rmsvolumetrics_txt2df(fullpath)
        return pd.DataFrame()

    if not SKIP_FMU_TOOLS:
        rmsvols_df = ensset3.apply(rms_vol2df,
                                   filename="share/results/volumes/" +
                                   "geogrid_vol_oil_1.txt")
        assert rmsvols_df["STOIIP_OIL"].sum() > 0
        assert len(rmsvols_df["REAL"].unique()) == 4
        assert len(rmsvols_df["ENSEMBLE"].unique()) == 2

        # Test that we can dump to disk as well and load from csv:
        ensset3.apply(
            rms_vol2df,
            filename="share/results/volumes/" + "geogrid_vol_oil_1.txt",
            localpath="share/results/volumes/geogrid--oil.csv",
            dumptodisk=True,
        )
        geogrid_oil = ensset3.load_csv(
            "share/results/volumes/geogrid--oil.csv")
        assert len(geogrid_oil["REAL"].unique()) == 4
        assert len(geogrid_oil["ENSEMBLE"].unique()) == 2

    # Initialize differently, using only the root path containing
    # realization-*
    ensset4 = EnsembleSet("foo", frompath=".")
    assert len(ensset4) == 2
    assert isinstance(ensset4["iter-0"], ScratchEnsemble)
    assert isinstance(ensset4["iter-1"], ScratchEnsemble)

    # Try the batch command feature:
    ensset5 = EnsembleSet(
        "reek001",
        frompath=".",
        batch=[
            {
                "load_scalar": {
                    "localpath": "npv.txt"
                }
            },
            {
                "load_smry": {
                    "column_keys": "FOPT",
                    "time_index": "yearly"
                }
            },
            {
                "load_smry": {
                    "column_keys": "*",
                    "time_index": "daily"
                }
            },
        ],
    )
    assert len(ensset5.get_df("npv.txt")) == 10
    assert len(ensset5.get_df("unsmry--yearly")) == 50
    assert len(ensset5.get_df("unsmry--daily")) == 10980

    # Try batch processing after initialization:
    ensset6 = EnsembleSet("reek001", frompath=".")
    ensset6.process_batch(batch=[
        {
            "load_scalar": {
                "localpath": "npv.txt"
            }
        },
        {
            "load_smry": {
                "column_keys": "FOPT",
                "time_index": "yearly"
            }
        },
        {
            "load_smry": {
                "column_keys": "*",
                "time_index": "daily"
            }
        },
    ])
    assert len(ensset5.get_df("npv.txt")) == 10
    assert len(ensset5.get_df("unsmry--yearly")) == 50
    assert len(ensset5.get_df("unsmry--daily")) == 10980
from fmu.ensemble import EnsembleSet

# Gather selected CSV files from each realization
# and dump them (each indivitually merged with parameters.txt)
# to share/results

ens = EnsembleSet(frompath="realization-*/iter-*")
csv_files = [
    "volumes/geogrid--oil.csv",
    "volumes/simgrid--oil.csv",
    "volumes/simulator_volume_fipnum.csv",
    "tables/rft.csv",
    "tables/unsmry--monthly.csv",
    "tables/equil.csv",
    "tables/relperm.csv",
    "tables/pvt.csv",
]

for file in csv_files:
    ens.load_csv("share/results/" + file)
    ens.get_df(file.split("/")[1],
               merge="parameters.txt").to_csv("share/results/" + file,
                                              index=False)