Exemplo n.º 1
0
def test_assemble_config_not_existing_obs():
    with pytest.raises(ValidationError) as ve:
        assemble_config(
            {"observations": ["not_an_observation"]},
            ["a", "list", "of", "existing", "observations"],
        )

    expected_err_msg = (
        "Invalid configuration of misfit preprocessor\n"
        "  - Found no match for observation not_an_observation (observations.0)\n"
    )
    assert expected_err_msg == str(ve.value)
Exemplo n.º 2
0
def test_assemble_config(input_observations, expected_result):
    config = assemble_config(
        {"observations": input_observations},
        ["a", "list", "of", "existing", "observations"],
    )

    assert expected_result == sorted(config.observations)
Exemplo n.º 3
0
def test_misfit_preprocessor_n_polynomials_w_correlation(num_polynomials):
    state_size = 3
    poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=10000,
    )
    measured_data = MockedMeasuredData(observations, simulated)

    # We add a correlation:
    measured_data.data["poly_0"] = measured_data.data["poly_1"] * 2.0

    config = assemble_config(
        {
            "clustering": {
                "method": "spearman_correlation"
            },
            "scaling": {
                "threshold": 0.99
            },
        },
        list(measured_data.data.columns.get_level_values(0)),
    ).snapshot
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert num_polynomials == len(configs) - 1, configs
Exemplo n.º 4
0
def test_misfit_preprocessor_state_uneven_size(state_size):
    num_polynomials = len(state_size)
    poly_states = [range(1, size + 1) for size in state_size]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=30000,
    )
    measured_data = MockedMeasuredData(observations, simulated)
    obs_keys = measured_data.data.columns.get_level_values(0)
    config = assemble_config(
        {
            "clustering": {
                "method": "spearman_correlation",
                "spearman_correlation": {
                    "fcluster": {
                        "t": num_polynomials + 1,
                        "criterion": "maxclust"
                    }
                },
            }
        },
        obs_keys,
    ).snapshot
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert num_polynomials == len(configs), configs
    assert_homogen_clusters(configs)
Exemplo n.º 5
0
def test_misfit_preprocessor_n_polynomials(num_polynomials, method):
    """
    The goal of this test is to create a data set of uncorrelated polynomials,
    meaning that there should be as many clusters as there are input polynomials.
    """
    state_size = 3
    poly_states = [range(1, state_size + 1) for _ in range(num_polynomials)]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=10000,
    )
    measured_data = MockedMeasuredData(observations, simulated)
    # We set the PCA threshold to 0.99 so a high degree of correlation is required
    # to have an impact. Setting it this way only has an impact for "auto_scale"
    obs_keys = measured_data.data.columns.get_level_values(0)
    config = assemble_config(
        {
            "clustering": {
                "method": method
            },
            "scaling": {
                "threshold": 0.99
            }
        },
        obs_keys,
    )
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config.snapshot, measured_data,
                                      reporter_mock)
    assert_homogen_clusters(configs)
    assert num_polynomials == len(configs), configs
Exemplo n.º 6
0
def test_assemble_config_default_observations():
    config = assemble_config(
        {},
        ["a", "list", "of", "existing", "observations"],
    ).snapshot

    assert sorted(["a", "list", "of", "existing",
                   "observations"]) == sorted(config.observations)
Exemplo n.º 7
0
def _fetch_scaling_parameters(config_record, observations):
    config = misfit_preprocessor.assemble_config(
        config_record,
        observations,
    )
    if not config.valid:
        # The config is loaded by misfit_preprocessor.run first. The
        # second time should never fail!
        raise ValueError("Misfit preprocessor config not valid on second load")

    scale_conf = config.snapshot.scaling
    return {
        "threshold": scale_conf.threshold,
    }
Exemplo n.º 8
0
def test_misfit_preprocessor_configuration_errors():
    with pytest.raises(misfit_preprocessor.ValidationError) as ve:
        assemble_config(
            {
                "unknown_key": ["not in set"],
                "clustering": {
                    "method": "spearman_correlation",
                    "spearman_correlation": {
                        "fcluster": {
                            "threshold": 1.0
                        }
                    },
                },
            },
            ["a", "list", "of", "observations"],
        )

    expected_err_msg = (
        "Invalid configuration of misfit preprocessor\n"
        "  - Unknown key: unknown_key (root level)\n"
        "  - Unknown key: threshold (clustering.spearman_correlation.fcluster)\n"
    )
    assert expected_err_msg == str(ve.value)
Exemplo n.º 9
0
def test_misfit_preprocessor_state_size(state_size, method, linkage):
    if state_size == [5, 5, 5, 5, 100]:
        if linkage == "average":
            pytest.skip("Produces wrong number of clusters")
        elif method == "auto_scale":
            pytest.skip(
                "Produces not homogeneous clusters due to PCA analysis")

    num_polynomials = 5
    poly_states = [range(1, size + 1) for size in state_size]

    observations, simulated = generate_measurements(
        num_polynomials,
        poly_states=poly_states,
        ensemble_size=30000,
    )
    measured_data = MockedMeasuredData(observations, simulated)
    obs_keys = measured_data.data.columns.get_level_values(0)
    config = assemble_config(
        {
            "clustering": {
                "method": method,
                method: {
                    "linkage": {
                        "method": linkage
                    }
                }
            },
            "scaling": {
                "threshold": 0.99
            },
        },
        obs_keys,
    ).snapshot
    reporter_mock = Mock()
    configs = misfit_preprocessor.run(config, measured_data, reporter_mock)
    assert_homogen_clusters(configs)
    assert num_polynomials == len(configs), configs