Пример #1
0
def _load_measured_record(facade, obs_keys):
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Пример #2
0
def test_summary_obs(monkeypatch, facade_snake_oil):
    summary_obs = MeasuredData(facade_snake_oil, ["WOPR_OP1_72"])
    summary_obs.remove_inactive_observations()
    assert all(
        summary_obs.data.columns.get_level_values("data_index").values == [71])
    # Only one observation, we check the key_index is what we expect:
    assert summary_obs.data.columns.get_level_values(
        "key_index").values[0] == np.datetime64("2011-12-21")
Пример #3
0
def test_history_obs(monkeypatch, facade_snake_oil):

    fopr = MeasuredData(facade_snake_oil, ["FOPR"])
    fopr.remove_inactive_observations()

    assert all(
        fopr.data.columns.get_level_values("data_index").values == list(
            range(199)))
Пример #4
0
def _get_measured_data(
    facade, observation_keys, observation_index_list, alpha, std_cutoff
):
    measured_data = MeasuredData(facade, observation_keys, observation_index_list)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(alpha)
    measured_data.filter_ensemble_std(std_cutoff)
    return measured_data
Пример #5
0
def test_gen_obs(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1"])
    df.remove_inactive_observations()

    assert all(
        df.data.columns.get_level_values("data_index").values ==
        [400, 800, 1200, 1800])
    assert all(
        df.data.columns.get_level_values("key_index").values ==
        [400, 800, 1200, 1800])
Пример #6
0
def test_remove_inactive_observations(input_dataframe, expected_result,
                                      monkeypatch, facade,
                                      measured_data_setup):

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    expected_result.columns = _set_multiindex(expected_result)

    md.remove_inactive_observations()
    assert md.data.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Пример #7
0
def _load_measured_record(enkf_main):
    facade = LibresFacade(enkf_main)
    obs_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Пример #8
0
def test_empty_dataset_from_remove_inactive_observations(
    input_header,
    measured_data,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    with pytest.raises(ValueError, match="This operation results in an empty dataset"):
        md.remove_inactive_observations()
Пример #9
0
def test_gen_obs_runtime(monkeypatch, copy_snake_oil):
    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_general_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    df = MeasuredData(facade,
                      [f"CUSTOM_DIFF_{restart}" for restart in range(1, 500)])

    df.remove_inactive_observations()
    assert df.data.shape == (27, 1995)
Пример #10
0
def test_gen_obs_and_summary_index_range(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "FOPR"], [[800], [10]])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "FOPR",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        800,
        10,
    ]
    assert df.data.loc["OBS"].values == pytest.approx([0.1, 0.23281],
                                                      abs=0.00001)
    assert df.data.loc["STD"].values == pytest.approx([0.2, 0.1])
Пример #11
0
def _extract_and_dump_observations(rdb_api, blob_api):
    facade = ERT.enkf_facade

    observation_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]

    measured_data = MeasuredData(facade, observation_keys)

    measured_data.remove_inactive_observations()
    observations = measured_data.data.loc[["OBS", "STD"]]

    _dump_observations(rdb_api=rdb_api,
                       blob_api=blob_api,
                       observations=observations)
Пример #12
0
def test_remove_inactive_observations(
    input_header,
    measured_data,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)

    md.remove_inactive_observations()
    assert md.data.equals(expected_result)
Пример #13
0
def test_gen_obs_and_summary(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "WOPR_OP1_9"])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WOPR_OP1_9",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        400,
        800,
        1200,
        1800,
        8,
    ]
Пример #14
0
def test_block_obs(monkeypatch, tmpdir):
    """
    This test causes util_abort on some runs, so it will not be run by default
    as it is too flaky. I have chosen to leave it here as it could be useful when
    debugging. To run the test, run an ensemble_experiment on the snake_oil_field
    case to create a storage with BLOCK_OBS.
    """
    with tmpdir.as_cwd():
        test_data_dir = pathlib.Path(test_data_root) / "snake_oil_field"
        if not (test_data_dir / "storage").exists():
            pytest.skip()
        else:
            shutil.copytree(test_data_dir, "test_data")
            os.chdir("test_data")

            block_obs = """
            \nBLOCK_OBSERVATION RFT_2006
            {
               FIELD = PRESSURE;
               DATE  = 10/01/2010;
               SOURCE = SUMMARY;

               OBS P1 { I = 5;  J = 5;  K = 5;   VALUE = 100;  ERROR = 5; };
               OBS P2 { I = 1;  J = 3;  K = 8;   VALUE = 50;  ERROR = 2; };
            };
            """
            obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
            with obs_file.open(mode="a") as fin:
                fin.write(block_obs)

            res_config = ResConfig("snake_oil.ert")
            ert = EnKFMain(res_config)
            facade = LibresFacade(ert)

            df = MeasuredData(facade, ["RFT_2006"])
            df.remove_inactive_observations()
            assert all(
                df.data.columns.get_level_values("data_index").values ==
                [0, 1])
            assert all(
                df.data.columns.get_level_values("key_index").values == [0, 1])
Пример #15
0
def _observation_scaling(facade, config):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    calculate_keys = [event.key for event in config.CALCULATE_KEYS.keys]
    index_lists = [event.index for event in config.CALCULATE_KEYS.keys]
    measured_data = MeasuredData(facade, calculate_keys, index_lists)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(config.CALCULATE_KEYS.alpha)
    measured_data.filter_ensemble_std(config.CALCULATE_KEYS.std_cutoff)

    matrix = DataMatrix(measured_data.data)
    matrix.std_normalization(inplace=True)

    scale_factor = matrix.get_scaling_factor(config.CALCULATE_KEYS)

    update_data = _create_active_lists(facade.get_observations(),
                                       config.UPDATE_KEYS.keys)

    _update_scaling(facade.get_observations(), scale_factor, update_data)
Пример #16
0
def _spearman_correlation(facade, obs_keys, threshold, dry_run):
    """
    Collects data, performs scaling and applies scaling, assumes validated input.
    """
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_std(1.0e-6)

    simulated_data = measured_data.get_simulated_data()

    correlation_matrix = _calculate_correlation_matrix(simulated_data)

    clusters = _cluster_analysis(correlation_matrix, threshold)

    columns = correlation_matrix.columns

    # Here the clusters are joined with the key and data index
    # to group the observations, the column level values are the column
    # headers, where key_index is the observation key and data_index
    # is a range.
    data = list(
        zip(
            clusters,
            columns.get_level_values(0),
            columns.get_level_values("data_index"),
        )
    )

    clustered_data = _cluster_data(data)

    job_configs = _config_creation(clustered_data)

    _output_clusters(clustered_data)

    if not dry_run:
        _run_scaling(facade, job_configs)