def test_misfit_preprocessor_configuration_errors(): observations, simulated = generate_measurements(1) measured_data = MockedMeasuredData(observations, simulated) config = { "unknown_key": [], "clustering": { "method": "spearman_correlation", "spearman_correlation": { "fcluster": { "threshold": 1.0 } }, }, } reporter_mock = Mock() with pytest.raises(misfit_preprocessor.ValidationError) as ve: misfit_preprocessor.run(config, measured_data, reporter_mock) expected_err_msg = ( "Invalid configuration of misfit preprocessor\n" " - Unknown key: unknown_key (root level)\n" " - Unknown key: threshold (clustering.spearman_correlation.fcluster)\n" ) assert expected_err_msg == str(ve.value)
def run(self, *args): facade = LibresFacade(self.ert()) config_record = _fetch_config_record(args) observations = _get_observations(facade) config = assemble_config(config_record, observations) config = config.snapshot measured_record = _load_measured_record(facade, config.observations) scaling_configs = misfit_preprocessor.run( **{ "config": config, "measured_data": measured_record, "reporter": self.reporter, }) # The execution of COS should be moved into # misfit_preprocessor.run when COS no longer depend on self.ert # to run. scaling_params = _fetch_scaling_parameters(config_record, observations) for scaling_config in scaling_configs: scaling_config["CALCULATE_KEYS"].update(scaling_params) try: CorrelatedObservationsScalingJob(self.ert()).run(scaling_configs) except EmptyDatasetException: pass
def test_misfit_preprocessor_n_polynomials_w_correlation(num_polynomials): state_size = 3 poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)] observations, simulated = generate_measurements( num_polynomials, poly_states=poly_states, ensemble_size=10000, ) measured_data = MockedMeasuredData(observations, simulated) # We add a correlation: measured_data.data["poly_0"] = measured_data.data["poly_1"] * 2.0 config = { "clustering": { "method": "spearman_correlation" }, "scaling": { "threshold": 0.99 }, } reporter_mock = Mock() configs = misfit_preprocessor.run(config, measured_data, reporter_mock) assert num_polynomials == len(configs) - 1, configs
def test_misfit_preprocessor_state_uneven_size(state_size): num_polynomials = len(state_size) poly_states = [range(1, size + 1) for size in state_size] observations, simulated = generate_measurements( num_polynomials, poly_states=poly_states, ensemble_size=30000, ) measured_data = MockedMeasuredData(observations, simulated) config = { "clustering": { "method": "spearman_correlation", "spearman_correlation": { "fcluster": { "t": num_polynomials + 1, "criterion": "maxclust" } }, } } reporter_mock = Mock() configs = misfit_preprocessor.run(config, measured_data, reporter_mock) assert num_polynomials == len(configs), configs assert_homogen_clusters(configs)
def test_misfit_preprocessor_state_size(state_size, method, linkage): if state_size == [5, 5, 5, 5, 100]: if linkage == "average": pytest.skip("Produces wrong number of clusters") elif method == "auto_scale": pytest.skip( "Produces not homogeneous clusters due to PCA analysis") num_polynomials = 5 poly_states = [range(1, size + 1) for size in state_size] observations, simulated = generate_measurements( num_polynomials, poly_states=poly_states, ensemble_size=30000, ) measured_data = MockedMeasuredData(observations, simulated) config = { "clustering": { "method": method, method: { "linkage": { "method": linkage } } }, "scaling": { "threshold": 0.99 }, } reporter_mock = Mock() configs = misfit_preprocessor.run(config, measured_data, reporter_mock) assert_homogen_clusters(configs) assert num_polynomials == len(configs), configs
def test_misfit_preprocessor_n_polynomials(num_polynomials, method): """ The goal of this test is to create a data set of uncorrelated polynomials, meaning that there should be as many clusters as there are input polynomials. """ state_size = 3 poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)] observations, simulated = generate_measurements( num_polynomials, poly_states=poly_states, ensemble_size=10000, ) measured_data = MockedMeasuredData(observations, simulated) # We set the PCA threshold to 0.99 so a high degree of correlation is required # to have an impact. Setting it this way only has an impact for "auto_scale" obs_keys = measured_data.data.columns.get_level_values(0) config = assemble_config( { "clustering": { "method": method }, "scaling": { "threshold": 0.99 } }, obs_keys, ) reporter_mock = Mock() configs = misfit_preprocessor.run(config.snapshot, measured_data, reporter_mock) assert_homogen_clusters(configs) assert num_polynomials == len(configs), configs
def run(self, *args): config_record = _fetch_config_record(args) measured_record = _load_measured_record(self.ert()) scaling_configs = misfit_preprocessor.run( **{ "misfit_preprocessor_config": config_record, "measured_data": measured_record, "reporter": self.reporter, }) # The execution of COS should be moved into # misfit_preprocessor.run when COS no longer depend on self.ert # to run. scaling_params = _fetch_scaling_parameters(config_record, measured_record) for scaling_config in scaling_configs: scaling_config["CALCULATE_KEYS"].update(scaling_params) try: CorrelatedObservationsScalingJob(self.ert()).run(scaling_configs) except EmptyDatasetException: pass