Пример #1
0
    def test_kmeans_modelweights(self):
        ds = xr.open_dataset(self.nc_file)
        # Test sample weights
        model_weights = np.ones(ds.data.shape[0])
        model_weights[[4, 7, 10, 23]] = 0

        # set to zero for some models that are selected in n_cluster test - these models should not be selected now
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"n_clusters": 4},
            random_state=42,
            make_graph=False,
            model_weights=model_weights,
        )

        for i in np.where(model_weights == 0)[0]:
            # as long as the cluster has more than one member the models w/ weight==0 should not be present
            if np.sum(cluster == cluster[i]) > 1:
                assert i not in ids

        model_weights = np.ones(ds.data.shape[0])
        model_weights[[0, 3, 4, 6, 7, 10, 11, 12, 13]] = 0
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"n_clusters": 9},
            random_state=42,
            make_graph=False,
            model_weights=model_weights,
        )
        for i in np.where(model_weights == 0)[0]:
            # as long as the cluster has more than one member the models w/ weight==0 should not be present
            if np.sum(cluster == cluster[i]) > 1:
                assert i not in ids
Пример #2
0
    def test_kmeans_variweights(self):
        pytest.importorskip("sklearn", minversion="0.24.1")
        ds = open_dataset(self.nc_file)
        # Test sample weights
        var_weights = np.ones(ds.data.shape[1])
        # reduce weights for some variables
        var_weights[3:] = 0.25

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_cutoff": 0.9},
            random_state=42,
            make_graph=False,
            variable_weights=var_weights,
        )
        assert ids == [1, 3, 8, 10, 13, 14, 16, 19, 20]
        assert len(ids) == 9

        # using RSQ optimize and try zero weights
        var_weights = np.ones(ds.data.shape[1])
        var_weights[[1, 4]] = 0

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_optimize": None},
            random_state=42,
            make_graph=False,
            variable_weights=var_weights,
        )
        # Results here may change according to sklearn version, hence the *isin* intead of ==
        assert all(np.isin([12, 13, 16], ids))
        assert len(ids) == 6
Пример #3
0
    def test_kmeans_sampleweights(self):
        ds = open_dataset(self.nc_file)
        # Test sample weights
        sample_weights = np.ones(ds.data.shape[0])
        # boost weights for some sims
        sample_weights[[0, 20]] = 15

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_cutoff": 0.5},
            random_state=42,
            make_graph=False,
            sample_weights=sample_weights,
        )
        assert ids == [0, 20, 23]
        assert len(ids) == 3

        # RSQ optimize
        sample_weights = np.ones(ds.data.shape[0])
        # try zero weights
        sample_weights[[6, 18, 22]] = 0

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_optimize": None},
            random_state=0,
            make_graph=False,
            sample_weights=sample_weights,
        )

        assert ids == [4, 5, 7, 10, 11, 12, 13]
        assert len(ids) == 7
Пример #4
0
    def test_kmeans_nclust(self):
        ds = xr.open_dataset(self.nc_file)

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data, method={"n_clusters": 4}, random_state=42, make_graph=False
        )
        assert ids == [4, 7, 10, 23]
        assert len(ids) == 4

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            ds.data, method={"n_clusters": 9}, random_state=42, make_graph=False
        )
        assert ids == [0, 3, 4, 6, 7, 10, 11, 12, 13]
        assert len(ids) == 9
Пример #5
0
    def test_kmeans_variweights(self):
        pytest.importorskip("sklearn", minversion="0.22")
        ds = xr.open_dataset(self.nc_file)
        # Test sample weights
        var_weights = np.ones(ds.data.shape[1])
        # reduce weights for some variables
        var_weights[3:] = 0.25

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_cutoff": 0.9},
            random_state=42,
            make_graph=False,
            variable_weights=var_weights,
        )
        assert ids == [1, 3, 8, 10, 13, 14, 16, 19, 20]
        assert len(ids) == 9

        # using RSQ optimize
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_optimize": None},
            random_state=42,
            make_graph=False,
            variable_weights=var_weights,
        )

        assert ids == [2, 4, 8, 13, 14, 22]
        assert len(ids) == 6

        # try zero weights
        var_weights = np.ones(ds.data.shape[1])
        var_weights[[1, 4]] = 0

        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_optimize": None},
            random_state=42,
            make_graph=False,
            variable_weights=var_weights,
        )
        assert ids == [4, 10, 12, 13, 16]
        assert len(ids) == 5
Пример #6
0
 def test_kmeans_rsqopt(self):
     pytest.importorskip("sklearn", minversion="0.24.1")
     ds = open_dataset(self.nc_file)
     [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
         data=ds.data,
         method={"rsq_optimize": None},
         random_state=42,
         make_graph=False,
     )
     assert ids == [3, 4, 5, 7, 10, 11, 12, 13]
     assert len(ids) == 8
Пример #7
0
    def test_kmeans_rsqcutoff_with_graphs(self):
        pytest.importorskip("sklearn", minversion="0.22")
        ds = xr.open_dataset(self.nc_file)

        # use random state variable to ensure consistent clustering in tests:
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data, method={"rsq_cutoff": 0.9}, random_state=42, make_graph=True
        )

        assert ids == [0, 1, 3, 4, 6, 7, 8, 9, 10, 11, 12, 15, 20, 22]
        assert len(ids) == 14

        # Test max cluster option
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_cutoff": 0.9},
            random_state=42,
            make_graph=True,
            max_clusters=10,
        )
        assert ids == [0, 1, 3, 4, 6, 7, 10, 11, 18, 20]
        assert len(ids) == 10
Пример #8
0
    def test_kmeans_rsqcutoff_with_graphs(self):
        pytest.importorskip("sklearn", minversion="0.24.1")
        ds = open_dataset(self.nc_file)

        # use random state variable to ensure consistent clustering in tests:
        [ids, cluster, fig_data
         ] = ensembles.kmeans_reduce_ensemble(data=ds.data,
                                              method={"rsq_cutoff": 0.5},
                                              random_state=42,
                                              make_graph=True)

        assert ids == [4, 7, 10, 23]
        assert len(ids) == 4

        # Test max cluster option
        [ids, cluster, fig_data] = ensembles.kmeans_reduce_ensemble(
            data=ds.data,
            method={"rsq_cutoff": 0.5},
            random_state=42,
            make_graph=True,
            max_clusters=3,
        )
        assert ids == [4, 7, 23]
        assert len(ids) == 3