Exemplos de df em Python, exemplos de ecl2df.summary.df em Python

Exemplo n.º 1

0

Exibir arquivo

def test_summary2df_dates():
    """Test that we have some API possibilities with ISO dates"""
    eclfiles = EclFiles(DATAFILE)

    sumdf = summary.df(
        eclfiles,
        start_date=datetime.date(2002, 1, 2),
        end_date="2002-03-01",
        time_index="daily",
        datetime=True,
    )
    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

    assert len(sumdf) == 59
    assert str(sumdf.index.values[0])[0:10] == "2002-01-02"
    assert sumdf.index.values[0] == np.datetime64("2002-01-02")
    assert sumdf.index.values[-1] == np.datetime64("2002-03-01")

    sumdf = summary.df(eclfiles, time_index="last", datetime=True)
    assert len(sumdf) == 1
    assert sumdf.index.values[0] == np.datetime64("2003-01-02")

    # Leave this test for the datetime=False behaviour:
    sumdf = summary.df(eclfiles, time_index="first")
    assert len(sumdf) == 1
    assert str(sumdf.index.values[0]) == "2000-01-01"

Exemplo n.º 2

0

Exibir arquivo

def test_df_column_keys():
    """Test that we can slice the dataframe on columns"""
    sumdf = summary.df(EclFiles(REEK), column_keys="FOPT")
    assert set(sumdf.columns) == {"FOPT"}
    assert set(sumdf.attrs["meta"].keys()) == {"FOPT"}

    fop_cols = {
        "FOPRS",
        "FOPT",
        "FOPRH",
        "FOPTH",
        "FOPRF",
        "FOPR",
        "FOPTS",
        "FOPTF",
        "FOPP",
    }
    sumdf = summary.df(EclFiles(REEK), column_keys="FOP*")
    assert set(sumdf.columns) == fop_cols
    assert set(sumdf.attrs["meta"].keys()) == fop_cols

    sumdf = summary.df(EclFiles(REEK), column_keys=["FOP*"])
    assert set(sumdf.columns) == fop_cols
    assert set(sumdf.attrs["meta"].keys()) == fop_cols

    sumdf = summary.df(EclFiles(REEK), column_keys=["FOPR", "FOPT"])
    assert set(sumdf.columns) == {"FOPT", "FOPR"}
    assert set(sumdf.attrs["meta"].keys()) == {"FOPT", "FOPR"}

    sumdf_no_columns = summary.df(EclFiles(REEK), column_keys=["BOGUS"])
    assert sumdf_no_columns.columns.empty
    assert all(sumdf_no_columns.index == sumdf.index)

Exemplo n.º 3

0

Exibir arquivo

def test_datenormalization():
    """Test normalization of dates, where
    dates can be ensured to be on dategrid boundaries"""

    start = datetime.date(1997, 11, 5)
    end = datetime.date(2020, 3, 2)

    assert normalize_dates(start, end, "monthly") == (
        datetime.date(1997, 11, 1),
        datetime.date(2020, 4, 1),
    )
    assert normalize_dates(start, end, "yearly") == (
        datetime.date(1997, 1, 1),
        datetime.date(2021, 1, 1),
    )

    # Check it does not touch already aligned dates
    assert normalize_dates(datetime.date(1997, 11,
                                         1), datetime.date(2020, 4, 1),
                           "monthly") == (datetime.date(1997, 11, 1),
                                          datetime.date(2020, 4, 1))
    assert normalize_dates(datetime.date(1997, 1,
                                         1), datetime.date(2021, 1, 1),
                           "yearly") == (datetime.date(1997, 1, 1),
                                         datetime.date(2021, 1, 1))

    # Check that we normalize correctly with get_smry():
    # realization-0 here has its last summary date at 2003-01-02
    eclfiles = EclFiles(DATAFILE)
    daily = summary.df(eclfiles, column_keys="FOPT", time_index="daily")
    assert str(daily.index[-1]) == "2003-01-02"
    monthly = summary.df(eclfiles, column_keys="FOPT", time_index="monthly")
    assert str(monthly.index[-1]) == "2003-02-01"
    yearly = summary.df(eclfiles, column_keys="FOPT", time_index="yearly")
    assert str(yearly.index[-1]) == "2004-01-01"

Exemplo n.º 4

0

Exibir arquivo

def test_foreseeable_future(tmpdir):
    """The foreseeable future in reservoir simulation is "defined" as 500 years.

    Check that we support summary files with this timespan"""
    tmpdir.chdir()
    src_dframe = pd.DataFrame([
        {
            "DATE": "2000-01-01",
            "FPR": 200
        },
        {
            "DATE": "2500-01-01",
            "FPR": 180
        },
    ])
    eclsum = df2eclsum(src_dframe, casename="PLUGABANDON")

    dframe = summary.df(eclsum)
    assert (dframe.index == [
        dt(2000, 1, 1),
        # This discrepancy is due to seconds as a 32-bit float
        # having an accuracy limit (roundoff-error)
        # https://github.com/equinor/ecl/issues/803
        dt(2499, 12, 31, 23, 55, 44),
    ]).all()

    # Try with time interpolation involved:
    dframe = summary.df(eclsum, time_index="yearly")
    assert len(dframe) == 501
    assert dframe.index.max() == datetime.date(year=2500, month=1, day=1)

    # Try with one-year timesteps:
    src_dframe = pd.DataFrame({
        "DATE":
        pd.date_range("2000-01-01", "2069-01-01", freq="YS"),
        "FPR":
        range(70),
    })
    eclsum = df2eclsum(src_dframe, casename="PLUGABANDON")
    dframe = summary.df(eclsum)
    # Still buggy:
    assert dframe.index[-1] == dt(2068, 12, 31, 23, 57, 52)

    # Try with one-year timesteps, starting late:
    src_dframe = pd.DataFrame({
        "DATE": [datetime.date(2400 + year, 1, 1) for year in range(69)],
        "FPR":
        range(69),
    })
    eclsum = df2eclsum(src_dframe, casename="PLUGABANDON")
    dframe = summary.df(eclsum)
    # Works fine when stepping only 68 years:
    assert dframe.index[-1] == dt(2468, 1, 1, 0, 0, 0)

Exemplo n.º 5

0

Exibir arquivo

def test_datenormalization():
    """Test normalization of dates, where
    dates can be ensured to be on dategrid boundaries"""
    # realization-0 here has its last summary date at 2003-01-02
    eclfiles = EclFiles(REEK)
    daily = summary.df(eclfiles, column_keys="FOPT", time_index="daily", datetime=True)
    assert str(daily.index[-1])[0:10] == "2003-01-02"
    monthly = summary.df(
        eclfiles, column_keys="FOPT", time_index="monthly", datetime=True
    )
    assert str(monthly.index[-1])[0:10] == "2003-02-01"
    yearly = summary.df(
        eclfiles, column_keys="FOPT", time_index="yearly", datetime=True
    )
    assert str(yearly.index[-1])[0:10] == "2004-01-01"

Exemplo n.º 6

0

Exibir arquivo

def test_summary2df():
    """Test that dataframes are produced"""
    eclfiles = EclFiles(DATAFILE)
    sumdf = summary.df(eclfiles)

    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

    assert not sumdf.empty
    assert sumdf.index.name == "DATE"
    assert not sumdf.columns.empty
    assert "FOPT" in sumdf.columns

    sumdf = summary.df(eclfiles, datetime=True)
    # (datetime=True is superfluous when raw time reports are requested)
    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

Exemplo n.º 7

0

Exibir arquivo

def test_df_column_keys():
    """Test that we can slice the dataframe on columns"""
    sumdf = summary.df(EclFiles(DATAFILE), column_keys="FOPT")
    assert set(sumdf.columns) == {"FOPT"}
    assert set(sumdf.attrs["meta"].keys()) == {"FOPT"}

    fop_cols = {
        "FOPRS",
        "FOPT",
        "FOPRH",
        "FOPTH",
        "FOPRF",
        "FOPR",
        "FOPTS",
        "FOPTF",
        "FOPP",
    }
    sumdf = summary.df(EclFiles(DATAFILE), column_keys="FOP*")
    assert set(sumdf.columns) == fop_cols
    assert set(sumdf.attrs["meta"].keys()) == fop_cols

    sumdf = summary.df(EclFiles(DATAFILE), column_keys=["FOP*"])
    assert set(sumdf.columns) == fop_cols
    assert set(sumdf.attrs["meta"].keys()) == fop_cols

    sumdf = summary.df(EclFiles(DATAFILE), column_keys=["FOPR", "FOPT"])
    assert set(sumdf.columns) == {"FOPT", "FOPR"}
    assert set(sumdf.attrs["meta"].keys()) == {"FOPT", "FOPR"}

    with pytest.raises(ValueError, match="No valid key"):
        summary.df(EclFiles(DATAFILE), column_keys=["BOGUS"])

Exemplo n.º 8

0

Exibir arquivo

def test_df():
    """Test that dataframes are produced"""
    eclfiles = EclFiles(DATAFILE)
    sumdf = summary.df(eclfiles)

    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

    assert not sumdf.empty
    assert sumdf.index.name == "DATE"
    assert not sumdf.columns.empty
    assert "FOPT" in sumdf.columns

    sumdf = summary.df(eclfiles, datetime=True)
    # (datetime=True is implicit when raw time reports are requested)
    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

    # Metadata should be attached using the attrs attribute on a Pandas
    # Dataframe (considered experimental by Pandas)
    assert "meta" in sumdf.attrs
    assert sumdf.attrs["meta"]["FOPR"]["unit"] == "SM3/DAY"

Exemplo n.º 9

0

Exibir arquivo

def test_df2eclsum_datetimeindex():
    """Test that providing a dataframe with a datetimeindex also works"""
    dframe = pd.DataFrame(
        [
            {"DATE": "2016-01-01", "FOPT": 1000, "FOPR": 100},
        ]
    )
    dframe["DATE"] = pd.to_datetime(dframe["DATE"])
    dframe.set_index("DATE")

    roundtrip = df(df2eclsum(dframe))
    assert isinstance(roundtrip.index, pd.DatetimeIndex)
    assert roundtrip["FOPR"].values == [100]
    assert roundtrip["FOPT"].values == [1000]

Exemplo n.º 10

0

Exibir arquivo

def df(eclfiles: EclFiles) -> pd.DataFrame:
    """Exctracts connection status history for each compdat connection that
    is included in the summary data on the form CPI:WELL,I,J,K. CPI stands for
    connection productivity index.

    One line is added to the export every time a connection changes status. It
    is OPEN when CPI>0 and SHUT when CPI=0. The earliest date for any connection
    will be OPEN, i.e a cell can not be SHUT before it has been OPEN. This means
    that any cells that are always SHUT will be excluded.

    The output data set is very sparse compared to the CPI summary data.
    """
    smry = summary.df(eclfiles, column_keys="CPI*")
    return _extract_status_changes(smry)

Exemplo n.º 11

0

Exibir arquivo

def test_ecl2df_errors(tmpdir):
    """Test error handling on bogus/corrupted summary files"""
    tmpdir.chdir()
    Path("FOO.UNSMRY").write_bytes(os.urandom(100))
    Path("FOO.SMSPEC").write_bytes(os.urandom(100))
    with pytest.raises(OSError, match="Failed to create summary instance"):
        # This is how libecl reacts to bogus binary data
        ecl.summary.EclSum("FOO.UNSMRY")

    # But EclFiles should be more tolerant, as it should be possible
    # to extract other data if SMRY is corrupted
    Path("FOO.DATA").write_text("RUNSPEC")
    assert str(EclFiles("FOO").get_ecldeck()).strip() == "RUNSPEC"
    with pytest.raises(OSError):
        EclFiles("FOO").get_eclsum()

    # Getting a dataframe from bogus data should give empty data:
    assert df(EclFiles("FOO")).empty

Exemplo n.º 12

0

Exibir arquivo

def test_df2eclsum(dframe):
    """Test that a dataframe can be converted to an EclSum object, and then read
    back again"""

    # Massage the dframe first so we can assert on equivalence after.
    dframe = _fix_dframe_for_libecl(dframe)

    eclsum = df2eclsum(dframe)
    if dframe.empty:
        assert eclsum is None
        return

    dframe_roundtrip = df(eclsum)
    pd.testing.assert_frame_equal(
        dframe.sort_index(axis=1),
        dframe_roundtrip.sort_index(axis=1),
        check_dtype=False,
    )

Exemplo n.º 13

0

Exibir arquivo

def test_extrapolation():
    """Summary data should be possible to extrapolate into
    the future, rates should be zero, cumulatives should be constant"""
    eclfiles = EclFiles(DATAFILE)
    lastfopt = summary.df(eclfiles,
                          column_keys="FOPT",
                          time_index="last",
                          datetime=True)["FOPT"].values[0]
    answer = pd.DataFrame(
        # This is the maximal date for datetime64[ns]
        index=[np.datetime64("2262-04-11")],
        columns=["FOPT", "FOPR"],
        data=[[lastfopt, 0.0]],
    ).rename_axis("DATE")

    pd.testing.assert_frame_equal(
        summary.df(
            eclfiles,
            column_keys=["FOPT", "FOPR"],
            time_index="2262-04-11",
            datetime=True,
        ),
        answer,
    )
    pd.testing.assert_frame_equal(
        summary.df(
            eclfiles,
            column_keys=["FOPT", "FOPR"],
            time_index=[datetime.date(2262, 4, 11)],
            # NB: df() does not support datetime64 for time_index
            datetime=True,
        ),
        answer,
    )

    # Pandas does not support DatetimeIndex beyound 2262:
    with pytest.raises(pd.errors.OutOfBoundsDatetime):
        summary.df(
            eclfiles,
            column_keys=["FOPT"],
            time_index=[datetime.date(2300, 1, 1)],
            datetime=True,
        )

    # But without datetime, we can get it extrapolated by libecl:
    assert summary.df(eclfiles,
                      column_keys=["FOPT"],
                      time_index=[datetime.date(2300, 1, 1)
                                  ])["FOPT"].values == [lastfopt]

Exemplo n.º 14

0

Exibir arquivo

def test_duplicated_summary_vectors(caplog):
    """EclSum files on disk may contain repeated vectors
    if the user has inserted a vector name twice in the
    SUMMARY section

    ecl2df.summary.df() should deduplicate this, and give a warning.
    """

    # ecl2df.df2eclsum() is not able to mock such a UNSMRY file.
    dupe_datafile = (
        TESTDIR
        / "data"
        / "eightcells"
        / "eightcells_duplicated_summary_vector"
        / "EIGHTCELLS_DUPES.DATA"
    )
    assert "SUMMARY\nFOPR\nFOPR" in dupe_datafile.read_text()
    deduplicated_dframe = df(EclFiles(dupe_datafile))
    assert (deduplicated_dframe.columns == ["YEARS", "FOPR"]).all()
    assert "Duplicated columns detected" in caplog.text

Exemplo n.º 15

0

Exibir arquivo

def test_paramsupport(tmpdir):
    """Test that we can merge in parameters.txt

    This test code manipulates the paths in the checked out
    repository (as it involves some pointing upwards in the directory structure)
    It should not leave any extra files around, but requires certain filenames
    not to be under version control.
    """
    tmpcsvfile = tmpdir / "sum.csv"

    eclfiles = EclFiles(DATAFILE)

    parameterstxt = Path(eclfiles.get_path()) / "parameters.txt"
    if parameterstxt.is_file():
        parameterstxt.unlink()
    parameterstxt.write_text("FOO 1\nBAR 3", encoding="utf-8")
    sys.argv = ["ecl2csv", "summary", DATAFILE, "-o", str(tmpcsvfile), "-p"]
    ecl2csv.main()
    disk_df = pd.read_csv(tmpcsvfile)
    assert "FOPT" in disk_df
    assert "FOO" in disk_df
    assert "BAR" in disk_df
    assert disk_df["BAR"].unique()[0] == 3
    parameterstxt.unlink()

    parametersyml = Path(eclfiles.get_path()) / "parameters.yml"
    if parametersyml.is_file():
        parametersyml.unlink()
    parametersyml.write_text(yaml.dump({"FOO": 1, "BAR": 3}), encoding="utf-8")
    sys.argv = ["ecl2csv", "summary", DATAFILE, "-o", str(tmpcsvfile), "-p"]
    ecl2csv.main()
    disk_df = pd.read_csv(str(tmpcsvfile))
    assert "FOPT" in disk_df
    assert "FOO" in disk_df
    assert len(disk_df["FOO"].unique()) == 1
    assert disk_df["FOO"].unique()[0] == 1
    assert "BAR" in disk_df
    assert len(disk_df["BAR"].unique()) == 1
    assert disk_df["BAR"].unique()[0] == 3

    # Test the merging from summary.df() explicitly:
    assert "FOO" in summary.df(eclfiles, params=True, paramfile=None)
    assert "FOO" not in summary.df(eclfiles, params=False, paramfile=None)
    assert "FOO" not in summary.df(eclfiles, params=None, paramfile=None)

    assert "FOO" in summary.df(eclfiles, params=False, paramfile=parametersyml)
    assert "FOO" in summary.df(eclfiles, params=None, paramfile=parametersyml)
    assert "FOO" in summary.df(eclfiles,
                               params=None,
                               paramfile="parameters.yml")

    # Non-existing relative path is a soft error:
    assert "FOO" not in summary.df(eclfiles,
                                   params=None,
                                   paramfile="notexisting/parameters.yml")

    # Non-existing absolute path is a hard error:
    with pytest.raises(FileNotFoundError):
        summary.df(eclfiles,
                   params=None,
                   paramfile="/tmp/notexisting/parameters.yml")

    parametersyml.unlink()

Exemplo n.º 16

0

Exibir arquivo

def test_summary2df_dates(tmpdir):
    """Test that we have some API possibilities with ISO dates"""
    eclfiles = EclFiles(DATAFILE)

    sumdf = summary.df(
        eclfiles,
        start_date=datetime.date(2002, 1, 2),
        end_date="2002-03-01",
        time_index="daily",
    )
    assert sumdf.index.name == "DATE"
    # This is the default when daily index is requested:
    assert sumdf.index.dtype == "object"

    assert len(sumdf) == 59
    assert str(sumdf.index.values[0]) == "2002-01-02"
    assert str(sumdf.index.values[-1]) == "2002-03-01"

    sumdf = summary.df(eclfiles, time_index="last")
    assert len(sumdf) == 1
    assert str(sumdf.index.values[0]) == "2003-01-02"

    sumdf = summary.df(eclfiles, time_index="first")
    assert len(sumdf) == 1
    assert str(sumdf.index.values[0]) == "2000-01-01"

    sumdf = summary.df(
        eclfiles,
        start_date=datetime.date(2002, 1, 2),
        end_date="2002-03-01",
        time_index="daily",
        datetime=True,
    )
    assert sumdf.index.name == "DATE"
    assert sumdf.index.dtype == "datetime64[ns]" or sumdf.index.dtype == "datetime64"

    tmpcsvfile = tmpdir.join(".TMP-sum.csv")
    sys.argv = [
        "ecl2csv",
        "summary",
        "-v",
        DATAFILE,
        "-o",
        str(tmpcsvfile),
        "--start_date",
        "2002-01-02",
        "--end_date",
        "2003-01-02",
    ]
    ecl2csv.main()
    disk_df = pd.read_csv(tmpcsvfile)
    assert len(disk_df) == 97  # Includes timestamps
    assert str(disk_df["DATE"].values[0]) == "2002-01-02 00:00:00"
    assert str(disk_df["DATE"].values[-1]) == "2003-01-02 00:00:00"

    tmpcsvfile = tmpdir.join(".TMP-sum.csv")
    sys.argv = [
        "ecl2csv",
        "summary",
        DATAFILE,
        "-o",
        str(tmpcsvfile),
        "--time_index",
        "daily",
        "--start_date",
        "2002-01-02",
        "--end_date",
        "2003-01-02",
    ]
    ecl2csv.main()
    disk_df = pd.read_csv(tmpcsvfile)
    assert len(disk_df) == 366
    assert str(disk_df["DATE"].values[0]) == "2002-01-02"
    assert str(disk_df["DATE"].values[-1]) == "2003-01-02"