Пример #1
0
def test_summary_obs_runtime(monkeypatch, copy_snake_oil):
    """
    This is mostly a regression test, as reading SUMMARY_OBS was very slow when using
    SUMMARY_OBSERVATION and not HISTORY_OBSERVATION where multiple observations
    were pointing to the same response. To simulate that we load the same observations
    though individual points, and also in one go. To avoid this test being flaky the
    we assert on the difference in runtime. The difference in runtime we assert on is
    set to 10x though it should be around 2x
    """

    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_summary_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    start_time = time.time()
    foprh = MeasuredData(facade,
                         [f"FOPR_{restart}" for restart in range(1, 201)])
    summary_obs_time = time.time() - start_time

    start_time = time.time()
    fopr = MeasuredData(facade, ["FOPR"])
    history_obs_time = time.time() - start_time

    assert (fopr.data.columns.get_level_values("data_index").values.tolist() ==
            foprh.data.columns.get_level_values("data_index").values.tolist())

    result = foprh.get_simulated_data().values == fopr.get_simulated_data(
    ).values
    assert np.logical_and.reduce(result).all()
    assert summary_obs_time < 10 * history_obs_time
Пример #2
0
def _load_measured_record(facade, obs_keys):
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Пример #3
0
def _load_measured_record(enkf_main):
    facade = LibresFacade(enkf_main)
    obs_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]
    return MeasuredData(facade, obs_keys)
Пример #4
0
def test_history_obs(monkeypatch, facade_snake_oil):

    fopr = MeasuredData(facade_snake_oil, ["FOPR"])
    fopr.remove_inactive_observations()

    assert all(
        fopr.data.columns.get_level_values("data_index").values == list(
            range(199)))
Пример #5
0
def test_summary_obs(monkeypatch, facade_snake_oil):
    summary_obs = MeasuredData(facade_snake_oil, ["WOPR_OP1_72"])
    summary_obs.remove_inactive_observations()
    assert all(
        summary_obs.data.columns.get_level_values("data_index").values == [71])
    # Only one observation, we check the key_index is what we expect:
    assert summary_obs.data.columns.get_level_values(
        "key_index").values[0] == np.datetime64("2011-12-21")
Пример #6
0
def test_no_storage_obs_only(monkeypatch, obs_key):
    shutil.rmtree("storage")
    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)
    md = MeasuredData(facade, [obs_key], load_data=False)
    assert set(md.data.columns.get_level_values(0)) == {obs_key}
Пример #7
0
def test_all_measured_snapshot(snapshot, facade_snake_oil):
    """
    While there is no guarantee that this snapshot is 100% correct, it does represent
    the current state of loading from storage for the snake_oil case.
    """
    obs_keys = facade_snake_oil.get_matching_wildcards()("*").strings
    measured_data = MeasuredData(facade_snake_oil, obs_keys)
    snapshot.assert_match(measured_data.data.to_csv(),
                          "snake_oil_measured_output.csv")
Пример #8
0
def _get_measured_data(
    facade, observation_keys, observation_index_list, alpha, std_cutoff
):
    measured_data = MeasuredData(facade, observation_keys, observation_index_list)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(alpha)
    measured_data.filter_ensemble_std(std_cutoff)
    return measured_data
Пример #9
0
def test_gen_obs(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1"])
    df.remove_inactive_observations()

    assert all(
        df.data.columns.get_level_values("data_index").values ==
        [400, 800, 1200, 1800])
    assert all(
        df.data.columns.get_level_values("key_index").values ==
        [400, 800, 1200, 1800])
Пример #10
0
def test_no_storage(monkeypatch, obs_key, expected_msg):
    shutil.rmtree("storage")
    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)
    with pytest.raises(
            loader.ResponseError,
            match=expected_msg,
    ):
        MeasuredData(facade, [obs_key])
Пример #11
0
def test_get_simulated_data(input_dataframe, expected_result, monkeypatch,
                            facade, measured_data_setup):

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    expected_result.columns = _set_multiindex(expected_result)

    result = md.get_simulated_data()
    assert result.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Пример #12
0
def test_remove_failed_realizations(input_dataframe, expected_result,
                                    monkeypatch, facade, measured_data_setup):
    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)

    assert md.data.equals(
        pd.concat({"test_key": expected_result.astype(float)}, axis=1))
Пример #13
0
def test_filter_ensamble_std(std_cutoff, expected_result, monkeypatch, facade,
                             measured_data_setup):
    expected_result.columns = _set_multiindex(expected_result)

    input_dataframe = pd.DataFrame(data=[[1, 1.5], [1, 2.5]], index=[1, 2])
    input_obs = pd.DataFrame(data=[[1, 2], [0.1, 0.2]], index=["OBS", "STD"])
    input_obs.columns = _set_multiindex(input_obs)
    measured_data_setup(input_dataframe, input_obs, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    md.filter_ensemble_std(std_cutoff)
    assert md.data.equals(pd.concat({"obs_key": expected_result}, axis=1))
Пример #14
0
def _load_measured_record(enkf_main):
    facade = LibresFacade(enkf_main)
    obs_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]
    measured_data = MeasuredData(facade, obs_keys)
    measured_data.remove_failed_realizations()
    measured_data.remove_inactive_observations()
    measured_data.filter_ensemble_mean_obs(facade.get_alpha())
    measured_data.filter_ensemble_std(facade.get_std_cutoff())
    return measured_data
Пример #15
0
def test_filter_ens_mean_obs(alpha, expected_result, monkeypatch, facade,
                             measured_data_setup):
    expected_result.columns = _set_multiindex(expected_result)

    input_dataframe = pd.DataFrame(data=[[1, 2], [0.1, 0.2], [1.1, 1.6],
                                         [1, 2.5]],
                                   index=["OBS", "STD", 1, 2])

    measured_data_setup(input_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"])

    md.filter_ensemble_mean_obs(alpha)
    assert md.data.equals(pd.concat({"test_key": expected_result}, axis=1))
Пример #16
0
def test_empty_dataset_from_remove_inactive_observations(
    input_header,
    measured_data,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    with pytest.raises(ValueError, match="This operation results in an empty dataset"):
        md.remove_inactive_observations()
Пример #17
0
def test_invalid_set_data(
    facade,
    monkeypatch,
    invalid_input,
    expected_error,
    valid_dataframe,
    measured_data_setup,
):

    measured_data_setup(valid_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"], index_lists=[[1, 2]])

    with pytest.raises(expected_error):
        md._set_data(invalid_input)
Пример #18
0
def test_gen_obs_runtime(monkeypatch, copy_snake_oil, snapshot):
    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_general_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    df = MeasuredData(facade,
                      [f"CUSTOM_DIFF_{restart}" for restart in range(500)])

    snapshot.assert_match(df.data.to_csv(), "snake_oil_gendata_output.csv")
Пример #19
0
def _create_observation_transformation(ert, db_observations) -> List[dict]:
    observation_vectors = ert.get_observations()
    summary_obs_keys = observation_vectors.getTypedKeylist(
        EnkfObservationImplementationType.SUMMARY_OBS
    )
    active_obs = _extract_active_observations(ert)
    transformations: Dict = dict()
    keys = [ert.get_observation_key(i) for i, _ in enumerate(observation_vectors)]
    data = MeasuredData(ert, keys, load_data=False)
    observations = data.data.loc[["OBS", "STD"]]

    for obs_key, active_mask in active_obs.items():
        obs_data = _get_obs_data(obs_key, observations[obs_key])
        if obs_key in summary_obs_keys:
            obs_vec = observation_vectors[obs_key]
            data_key = obs_vec.getDataKey()
            if data_key in transformations:
                transformations[data_key]["x_axis"] += obs_data["x_axis"]
                transformations[data_key]["active"] += active_mask
                transformations[data_key]["scale"] += [1 for _ in active_mask]
            else:
                transformations[data_key] = dict(
                    name=data_key,
                    x_axis=obs_data["x_axis"],
                    scale=[1 for _ in active_mask],
                    active=active_mask,
                )
        else:
            # Scale is now mocked to 1 for now
            transformations[obs_key] = dict(
                name=obs_key,
                x_axis=obs_data["x_axis"],
                scale=[1 for _ in active_mask],
                active=active_mask,
            )
    observation_ids = {obs["name"]: obs["id"] for obs in db_observations}
    # Sorting by x_axis matches the transformation with the observation, mostly needed for grouped summary obs
    for key, obs in transformations.items():
        x_axis, active, scale = (
            list(t)
            for t in zip(*sorted(zip(obs["x_axis"], obs["active"], obs["scale"])))
        )
        x_axis = _prepare_x_axis(x_axis)
        transformations[key]["x_axis"] = x_axis
        transformations[key]["active"] = active
        transformations[key]["scale"] = scale
        transformations[key]["observation_id"] = observation_ids[key]

    return [transformation for _, transformation in transformations.items()]
Пример #20
0
def test_gen_obs_runtime(monkeypatch, copy_snake_oil):
    obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
    with obs_file.open(mode="a") as fin:
        fin.write(create_general_observation())

    res_config = ResConfig("snake_oil.ert")
    ert = EnKFMain(res_config)

    facade = LibresFacade(ert)

    df = MeasuredData(facade,
                      [f"CUSTOM_DIFF_{restart}" for restart in range(1, 500)])

    df.remove_inactive_observations()
    assert df.data.shape == (27, 1995)
Пример #21
0
def test_gen_obs_and_summary_index_range(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "FOPR"], [[800], [10]])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "FOPR",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        800,
        10,
    ]
    assert df.data.loc["OBS"].values == pytest.approx([0.1, 0.23281],
                                                      abs=0.00001)
    assert df.data.loc["STD"].values == pytest.approx([0.2, 0.1])
Пример #22
0
def test_remove_failed_realizations(
    input_dataframe,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
    valid_obs_data,
):
    measured_data_setup(input_dataframe, valid_obs_data, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    md.remove_failed_realizations()

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)
    assert md.data.equals(expected_result)
Пример #23
0
def _extract_and_dump_observations(rdb_api, blob_api):
    facade = ERT.enkf_facade

    observation_keys = [
        facade.get_observation_key(nr)
        for nr, _ in enumerate(facade.get_observations())
    ]

    measured_data = MeasuredData(facade, observation_keys)

    measured_data.remove_inactive_observations()
    observations = measured_data.data.loc[["OBS", "STD"]]

    _dump_observations(rdb_api=rdb_api,
                       blob_api=blob_api,
                       observations=observations)
Пример #24
0
def test_remove_inactive_observations(
    input_header,
    measured_data,
    expected_result,
    monkeypatch,
    facade,
    measured_data_setup,
):
    input_header.columns = _set_multiindex(input_header)
    measured_data_setup(measured_data, input_header, monkeypatch)
    md = MeasuredData(facade, ["obs_key"])

    expected_result.columns = _set_multiindex(expected_result)
    expected_result = pd.concat({"obs_key": expected_result}, axis=1)

    md.remove_inactive_observations()
    assert md.data.equals(expected_result)
Пример #25
0
def test_get_data(obs_type, monkeypatch, facade, valid_dataframe, measured_data_setup):

    facade.get_impl_type_name_for_obs_key.return_value = obs_type

    factory = measured_data_setup(valid_dataframe, monkeypatch)
    md = MeasuredData(facade, ["test_key"], index_lists=[[1, 2]])

    factory.assert_called_once_with(obs_type)
    mocked_loader = factory()
    mocked_loader.assert_called_once_with(facade, "test_key", "test_case", True)

    df = pd.DataFrame(
        data=[[2.0, 3.0], [5.0, 6.0]], index=["OBS", "STD"], columns=[1, 2]
    )
    df.columns = _set_multiindex(df)
    expected_result = pd.concat({"test_key": df}, axis=1)

    assert md._data.equals(expected_result)
Пример #26
0
def test_gen_obs_and_summary(monkeypatch, facade_snake_oil):
    df = MeasuredData(facade_snake_oil, ["WPR_DIFF_1", "WOPR_OP1_9"])
    df.remove_inactive_observations()

    assert df.data.columns.get_level_values(0).to_list() == [
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WPR_DIFF_1",
        "WOPR_OP1_9",
    ]
    assert df.data.columns.get_level_values("data_index").to_list() == [
        400,
        800,
        1200,
        1800,
        8,
    ]
Пример #27
0
def create_observations(ert) -> Tuple[List[js.ObservationCreate], Mapping[str, str]]:
    observation_vectors = ert.get_observations()
    keys = [ert.get_observation_key(i) for i, _ in enumerate(observation_vectors)]
    summary_obs_keys = observation_vectors.getTypedKeylist(
        EnkfObservationImplementationType.SUMMARY_OBS
    )

    def _get_obs_data(key, obs):
        return dict(
            name=key,
            x_axis=obs.columns.get_level_values(0).to_list(),
            values=obs.loc["OBS"].to_list(),
            errors=obs.loc["STD"].to_list(),
        )

    data = MeasuredData(ert, keys, load_data=False)

    observations = data.data.loc[["OBS", "STD"]]

    grouped_obs = {}

    response_observation_link = {}

    for obs_key in observations.columns.get_level_values(0).unique():
        obs_vec = observation_vectors[obs_key]
        data_key = obs_vec.getDataKey()
        obs_data = _get_obs_data(obs_key, observations[obs_key])

        if obs_key not in summary_obs_keys:
            grouped_obs[obs_key] = obs_data
            response_observation_link[data_key] = obs_key
        else:
            response_observation_link[data_key] = data_key
            if data_key in grouped_obs:
                for el in filter(lambda x: not x == "name", obs_data):
                    grouped_obs[data_key][el] += obs_data[el]
            else:
                obs_data["name"] = data_key
                grouped_obs[data_key] = obs_data

    return [
        js.ObservationCreate(**obs) for obs in grouped_obs.values()
    ], response_observation_link
    def run(self, job_config):
        facade = LibresFacade(self.ert())
        user_config = load_yaml(job_config)
        user_config = _insert_default_group(user_config)

        obs = facade.get_observations()
        obs_keys = [facade.get_observation_key(nr) for nr, _ in enumerate(obs)]
        obs_with_data = keys_with_data(
            obs,
            obs_keys,
            facade.get_ensemble_size(),
            facade.get_current_fs(),
        )

        for config in user_config:
            job = ScalingJob(obs_keys, obs, obs_with_data, config)
            measured_data = MeasuredData(facade, job.get_calc_keys(),
                                         job.get_index_lists())
            job.scale(measured_data)
Пример #29
0
    def run(self, *args):
        facade = LibresFacade(self.ert())

        obs_keys = [
            facade.get_observation_key(nr)
            for nr, _ in enumerate(facade.get_observations())
        ]
        measured_data = MeasuredData(facade, obs_keys)

        parser = spearman_job_parser()
        args = parser.parse_args(args)

        scaling_configs = spearman_job(measured_data, args.threshold)

        if not args.dry_run:
            try:
                CorrelatedObservationsScalingJob(
                    self.ert()).run(scaling_configs)
            except EmptyDatasetException:
                pass
Пример #30
0
def test_block_obs(monkeypatch, tmpdir):
    """
    This test causes util_abort on some runs, so it will not be run by default
    as it is too flaky. I have chosen to leave it here as it could be useful when
    debugging. To run the test, run an ensemble_experiment on the snake_oil_field
    case to create a storage with BLOCK_OBS.
    """
    with tmpdir.as_cwd():
        test_data_dir = pathlib.Path(test_data_root) / "snake_oil_field"
        if not (test_data_dir / "storage").exists():
            pytest.skip()
        else:
            shutil.copytree(test_data_dir, "test_data")
            os.chdir("test_data")

            block_obs = """
            \nBLOCK_OBSERVATION RFT_2006
            {
               FIELD = PRESSURE;
               DATE  = 10/01/2010;
               SOURCE = SUMMARY;

               OBS P1 { I = 5;  J = 5;  K = 5;   VALUE = 100;  ERROR = 5; };
               OBS P2 { I = 1;  J = 3;  K = 8;   VALUE = 50;  ERROR = 2; };
            };
            """
            obs_file = pathlib.Path.cwd() / "observations" / "observations.txt"
            with obs_file.open(mode="a") as fin:
                fin.write(block_obs)

            res_config = ResConfig("snake_oil.ert")
            ert = EnKFMain(res_config)
            facade = LibresFacade(ert)

            df = MeasuredData(facade, ["RFT_2006"])
            df.remove_inactive_observations()
            assert all(
                df.data.columns.get_level_values("data_index").values ==
                [0, 1])
            assert all(
                df.data.columns.get_level_values("key_index").values == [0, 1])