Exemplo n.º 1
0
def test_make_checkerboard():
    X, rows, cols = make_checkerboard(shape=(100, 100),
                                      n_clusters=(20, 5),
                                      shuffle=True,
                                      random_state=0)
    assert X.shape == (100, 100), "X shape mismatch"
    assert rows.shape == (100, 100), "rows shape mismatch"
    assert cols.shape == (
        100,
        100,
    ), "columns shape mismatch"

    X, rows, cols = make_checkerboard(shape=(100, 100),
                                      n_clusters=2,
                                      shuffle=True,
                                      random_state=0)
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X1, _, _ = make_checkerboard(shape=(100, 100),
                                 n_clusters=2,
                                 shuffle=True,
                                 random_state=0)
    X2, _, _ = make_checkerboard(shape=(100, 100),
                                 n_clusters=2,
                                 shuffle=True,
                                 random_state=0)
    assert_array_almost_equal(X1, X2)
Exemplo n.º 2
0
def test_perfect_checkerboard():
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
Exemplo n.º 3
0
def test_make_checkerboard():
    X, rows, cols = make_checkerboard(shape=(100, 100), n_clusters=(20, 5), shuffle=True, random_state=0)
    assert_equal(X.shape, (100, 100), "X shape mismatch")
    assert_equal(rows.shape, (100, 100), "rows shape mismatch")
    assert_equal(cols.shape, (100, 100), "columns shape mismatch")

    X, rows, cols = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    assert_all_finite(X)
    assert_all_finite(rows)
    assert_all_finite(cols)

    X1, _, _ = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    X2, _, _ = make_checkerboard(shape=(100, 100), n_clusters=2, shuffle=True, random_state=0)
    assert_array_equal(X1, X2)
Exemplo n.º 4
0
def test_perfect_checkerboard():
    # XXX Previously failed on build bot (not reproducible)
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert consensus_score(model.biclusters_, (rows, cols)) == 1
Exemplo n.º 5
0
def test_spectral_biclustering():
    """Test Kluger methods on a checkerboard dataset."""
    param_grid = {'method': ['scale', 'bistochastic', 'log'],
                  'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [10],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=random_state)
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralBiclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)

            if issparse(mat) and kwargs['method'] == 'log':
                # cannot take log of sparse matrix
                assert_raises(ValueError, model.fit, mat)
                continue
            else:
                model.fit(mat)

            assert_equal(model.rows_.shape, (9, 30))
            assert_equal(model.columns_.shape, (9, 30))
            assert_array_equal(model.rows_.sum(axis=0),
                               np.repeat(3, 30))
            assert_array_equal(model.columns_.sum(axis=0),
                               np.repeat(3, 30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
Exemplo n.º 6
0
def test_spectral_biclustering():
    """Test Kluger methods on a checkerboard dataset."""
    param_grid = {'method': ['scale', 'bistochastic', 'log'],
                  'svd_method': ['randomized', 'arpack'],
                  'n_svd_vecs': [None, 20],
                  'mini_batch': [False, True],
                  'init': ['k-means++'],
                  'n_init': [3],
                  'n_jobs': [1]}
    random_state = 0
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=random_state)
    for mat in (S, csr_matrix(S)):
        for kwargs in ParameterGrid(param_grid):
            model = SpectralBiclustering(n_clusters=3,
                                         random_state=random_state,
                                         **kwargs)

            if issparse(mat) and kwargs['method'] == 'log':
                # cannot take log of sparse matrix
                assert_raises(ValueError, model.fit, mat)
                continue
            else:
                model.fit(mat)

            assert_equal(model.rows_.shape, (9, 30))
            assert_equal(model.columns_.shape, (9, 30))
            assert_array_equal(model.rows_.sum(axis=0),
                               np.repeat(3, 30))
            assert_array_equal(model.columns_.sum(axis=0),
                               np.repeat(3, 30))
            assert_equal(consensus_score(model.biclusters_,
                                         (rows, cols)), 1)
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0, random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_, (rows, cols)), 1)
Exemplo n.º 8
0
def test_perfect_checkerboard():
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)
Exemplo n.º 9
0
    def test_Bicluster(self, algo):
        data, rows, columns = datasets.make_checkerboard(
            shape=(300, 300), n_clusters=5, noise=10,
            shuffle=True, random_state=self.random_state)
        df = pdml.ModelFrame(data)

        mod1 = getattr(df.cluster.bicluster, algo)(3, random_state=self.random_state)
        mod2 = getattr(cluster.bicluster, algo)(3, random_state=self.random_state)

        df.fit(mod1)
        mod2.fit(data)

        self.assert_numpy_array_almost_equal(mod1.biclusters_, mod2.biclusters_)
Exemplo n.º 10
0
def test_perfect_checkerboard():
    raise SkipTest("This test is failing on the buildbot, but cannot"
                   " reproduce. Temporarily disabling it until it can be"
                   " reproduced and  fixed.")
    model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

    S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((40, 30), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)

    S, rows, cols = make_checkerboard((30, 40), 3, noise=0,
                                      random_state=0)
    model.fit(S)
    assert_equal(consensus_score(model.biclusters_,
                                 (rows, cols)), 1)
Exemplo n.º 11
0
    def test_Bicluster(self):
        data, rows, columns = datasets.make_checkerboard(
            shape=(300, 300), n_clusters=5, noise=10,
            shuffle=True, random_state=self.random_state)
        df = pdml.ModelFrame(data)

        models = ['SpectralBiclustering', 'SpectralCoclustering']
        for model in models:
            mod1 = getattr(df.cluster.bicluster, model)(3, random_state=self.random_state)
            mod2 = getattr(cluster.bicluster, model)(3, random_state=self.random_state)

            df.fit(mod1)
            mod2.fit(data)

            self.assert_numpy_array_almost_equal(mod1.biclusters_, mod2.biclusters_)
Exemplo n.º 12
0
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    with pytest.raises(ValueError):
                        model.fit(mat)
                    continue
                else:
                    model.fit(mat)

                assert model.rows_.shape == (9, 30)
                assert model.columns_.shape == (9, 30)
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert consensus_score(model.biclusters_,
                                       (rows, cols)) == 1

                _test_shape_indices(model)
Exemplo n.º 13
0
def test_spectral_biclustering():
    # Test Kluger methods on a checkerboard dataset.
    S, rows, cols = make_checkerboard((30, 30), 3, noise=0.5,
                                      random_state=0)

    non_default_params = {'method': ['scale', 'log'],
                          'svd_method': ['arpack'],
                          'n_svd_vecs': [20],
                          'mini_batch': [True]}

    for mat in (S, csr_matrix(S)):
        for param_name, param_values in non_default_params.items():
            for param_value in param_values:

                model = SpectralBiclustering(
                    n_clusters=3,
                    n_init=3,
                    init='k-means++',
                    random_state=0,
                )
                model.set_params(**dict([(param_name, param_value)]))

                if issparse(mat) and model.get_params().get('method') == 'log':
                    # cannot take log of sparse matrix
                    assert_raises(ValueError, model.fit, mat)
                    continue
                else:
                    model.fit(mat)

                assert_equal(model.rows_.shape, (9, 30))
                assert_equal(model.columns_.shape, (9, 30))
                assert_array_equal(model.rows_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_array_equal(model.columns_.sum(axis=0),
                                   np.repeat(3, 30))
                assert_equal(consensus_score(model.biclusters_,
                                             (rows, cols)), 1)

                _test_shape_indices(model)
Exemplo n.º 14
0
from sklearn import datasets
import matplotlib.pyplot as plt

# make_checkerboard data
X, rows, cols = datasets.make_checkerboard((10, 10),
                                           4,
                                           noise=0.0,
                                           minval=10,
                                           maxval=100,
                                           shuffle=False,
                                           random_state=None)
print(X)
print(rows)
print(cols)

# plot dataset
plt.matshow(X)
plt.show()
Exemplo n.º 15
0
        temppath = tempimage()
        plt.savefig(temppath, dpi=dpi)
        dx,dy = imagesize(temppath)
        w = min(W,dx)
        image(temppath,imgx,imgy,width=w)
        imgy = imgy + dy + 20
        os.remove(temppath)
        size(W, HEIGHT+dy+40)
else:
    def pltshow(mplpyplot):
        mplpyplot.show()
# nodebox section end

n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

data, row_idx, col_idx = sg._shuffle(data, random_state=0)
plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralBiclustering(n_clusters=n_clusters, method='log',
                             random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))
Exemplo n.º 16
0
 def checkerboard(self, *args, **kwargs):
     X, y = make_checkerboard(*args, **kwargs)
     return X, y
Exemplo n.º 17
0
@author: nova
"""

import sklearn.datasets as skd
import pandas as pd
import numpy as np

shape_host = (16, 3000)
cluster_host = (4, 4)
shape_sym = (16, 2000)
cluster_sym = (4, 4)

host, row_host, col_host = skd.make_checkerboard(shape_host,
                                                 cluster_host,
                                                 noise=10.0,
                                                 shuffle=False,
                                                 random_state=2020)

sym, row_sym, col_sym = skd.make_checkerboard(shape_sym,
                                              cluster_sym,
                                              noise=10.0,
                                              shuffle=False,
                                              random_state=212)

host_ = pd.DataFrame(np.round(host))
host_ = host_.sample(n=3000, replace=False, random_state=12, axis=1)
sym_ = pd.DataFrame(np.round(sym)).sample(n=2000,
                                          replace=False,
                                          random_state=321,
                                          axis=1)
from sklearn.datasets import make_checkerboard
from matplotlib import pyplot
import numpy

from generateCsv import *

n_clusters = 2
shape = (100, 100)

PLOT_DATA = False

if __name__ == "__main__":

    data, rows, columns = make_checkerboard(shape,
                                            n_clusters,
                                            noise=0,
                                            shuffle=True)

    if (PLOT_DATA != True):

        resMap = {}
        for i in range(shape[0]):
            for j in range(shape[1]):
                val = data[i][j]
                if (val not in resMap):
                    resMap[val] = len(resMap)

        X = numpy.zeros((shape[0] * shape[1], 2))
        for i in range(X.shape[0]):
            X[i][0] = (i / shape[1]) + 1
            X[i][1] = (i % shape[1]) + 1
Exemplo n.º 19
0
def getSKData(style='timeseries', n_samples=1, **kwargs):
    if isinstance(style, str):
        style = Style(style.lower())
    if style == Style.REGRESSION:
        return make_regression(
            n_samples, kwargs.get('n_features', RegressionArgs.n_features),
            kwargs.get('n_informative', RegressionArgs.n_informative),
            kwargs.get('n_targets', RegressionArgs.n_targets),
            kwargs.get('bias', RegressionArgs.bias),
            kwargs.get('effective_rank', RegressionArgs.effective_rank),
            kwargs.get('tail_strength', RegressionArgs.tail_strength),
            kwargs.get('noise', RegressionArgs.noise),
            kwargs.get('shuffle', RegressionArgs.shuffle),
            kwargs.get('coef', RegressionArgs.coef),
            kwargs.get('random_state', RegressionArgs.random_state))
    elif style == Style.BLOBS:
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == Style.CLASSIFICATION:
        return make_classification(
            n_samples, kwargs.get('n_features', ClassificationArgs.n_features),
            kwargs.get('n_informative', ClassificationArgs.n_informative),
            kwargs.get('n_redundant', ClassificationArgs.n_redundant),
            kwargs.get('n_repeated', ClassificationArgs.n_repeated),
            kwargs.get('n_classes', ClassificationArgs.n_classes),
            kwargs.get('n_clusters_per_class',
                       ClassificationArgs.n_clusters_per_class),
            kwargs.get('weights', ClassificationArgs.weights),
            kwargs.get('flip_y', ClassificationArgs.flip_y),
            kwargs.get('class_sep', ClassificationArgs.class_sep),
            kwargs.get('hypercube', ClassificationArgs.hypercube),
            kwargs.get('shift', ClassificationArgs.shift),
            kwargs.get('scale', ClassificationArgs.scale),
            kwargs.get('shuffle', ClassificationArgs.shuffle),
            kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == Style.MULTILABEL:
        return make_multilabel_classification(
            n_samples,
            kwargs.get('n_features', MultilabelClassificationArgs.n_features),
            kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
            kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
            kwargs.get('length', MultilabelClassificationArgs.length),
            kwargs.get('allow_unlabeled',
                       MultilabelClassificationArgs.allow_unlabeled),
            kwargs.get('sparse', MultilabelClassificationArgs.sparse),
            kwargs.get('return_indicator',
                       MultilabelClassificationArgs.return_indicator),
            kwargs.get('return_distributions',
                       MultilabelClassificationArgs.return_distributions),
            kwargs.get('random_state',
                       MultilabelClassificationArgs.random_state))
    elif style == Style.GAUSSIAN:
        return make_gaussian_quantiles(
            n_samples=n_samples,
            n_features=kwargs.get('n_features', GaussianArgs.n_features),
            mean=kwargs.get('mean', GaussianArgs.mean),
            cov=kwargs.get('cov', GaussianArgs.cov),
            n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
            shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
            random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == Style.HASTIE:
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get(
                                    'random_state', HastieArgs.random_state))
    elif style == Style.CIRCLES:
        return make_circles(
            n_samples, kwargs.get('shuffle', CirclesArgs.shuffle),
            kwargs.get('noise', CirclesArgs.noise),
            kwargs.get('random_state', CirclesArgs.random_state),
            kwargs.get('factor', CirclesArgs.factor))
    elif style == Style.MOONS:
        return make_moons(n_samples, kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == Style.BICLUSTERS:
        return make_biclusters(
            kwargs.get('shape', BiclusterArgs.shape),
            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
            kwargs.get('noise', BiclusterArgs.noise),
            kwargs.get('minval', BiclusterArgs.minval),
            kwargs.get('maxval', BiclusterArgs.maxval),
            kwargs.get('shuffle', BiclusterArgs.shuffle),
            kwargs.get('random_state', BiclusterArgs.random_state))
    elif style == Style.SCURVE:
        return make_s_curve(
            n_samples, kwargs.get('noise', SCurveArgs.noise),
            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == Style.CHECKER:
        return make_checkerboard(
            kwargs.get('shape', CheckerArgs.shape),
            kwargs.get('n_clusters', CheckerArgs.n_clusters),
            kwargs.get('noise', CheckerArgs.noise),
            kwargs.get('minval', CheckerArgs.minval),
            kwargs.get('maxval', CheckerArgs.maxval),
            kwargs.get('shuffle', CheckerArgs.shuffle),
            kwargs.get('random_state', CheckerArgs.random_state))
    elif style == Style.FRIEDMAN:
        return make_friedman1(
            n_samples, kwargs.get('n_features', FriedmanArgs.n_features),
            kwargs.get('noise', FriedmanArgs.noise),
            kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == Style.FRIEDMAN2:
        return make_friedman2(
            n_samples, kwargs.get('noise', Friedman2Args.noise),
            kwargs.get('random_state', Friedman2Args.random_state))
    elif style == Style.FRIEDMAN3:
        return make_friedman3(
            n_samples, kwargs.get('noise', Friedman3Args.noise),
            kwargs.get('random_state', Friedman3Args.random_state))
Exemplo n.º 20
0
def getSKData(style='timeseries', as_dataframe=False, n_samples=10, **kwargs):
    if style == 'regression':
        return make_regression(n_samples,
                               kwargs.get('n_features', RegressionArgs.n_features),
                               kwargs.get('n_informative', RegressionArgs.n_informative),
                               kwargs.get('n_targets', RegressionArgs.n_targets),
                               kwargs.get('bias', RegressionArgs.bias),
                               kwargs.get('effective_rank', RegressionArgs.effective_rank),
                               kwargs.get('tail_strength', RegressionArgs.tail_strength),
                               kwargs.get('noise', RegressionArgs.noise),
                               kwargs.get('shuffle', RegressionArgs.shuffle),
                               kwargs.get('coef', RegressionArgs.coef),
                               kwargs.get('random_state', RegressionArgs.random_state))
    elif style == 'blobs':
        return make_blobs(n_samples,
                          kwargs.get('n_features', BlobsArgs.n_features),
                          kwargs.get('centers', BlobsArgs.centers),
                          kwargs.get('cluster_std', BlobsArgs.cluster_std),
                          kwargs.get('center_box', BlobsArgs.center_box),
                          kwargs.get('shuffle', BlobsArgs.shuffle),
                          kwargs.get('random_state', BlobsArgs.random_state))
    elif style == 'classification':
        return make_classification(n_samples,
                                   kwargs.get('n_features', ClassificationArgs.n_features),
                                   kwargs.get('n_informative', ClassificationArgs.n_informative),
                                   kwargs.get('n_redundant', ClassificationArgs.n_redundant),
                                   kwargs.get('n_repeated', ClassificationArgs.n_repeated),
                                   kwargs.get('n_classes', ClassificationArgs.n_classes),
                                   kwargs.get('n_clusters_per_class', ClassificationArgs.n_clusters_per_class),
                                   kwargs.get('weights', ClassificationArgs.weights),
                                   kwargs.get('flip_y', ClassificationArgs.flip_y),
                                   kwargs.get('class_sep', ClassificationArgs.class_sep),
                                   kwargs.get('hypercube', ClassificationArgs.hypercube),
                                   kwargs.get('shift', ClassificationArgs.shift),
                                   kwargs.get('scale', ClassificationArgs.scale),
                                   kwargs.get('shuffle', ClassificationArgs.shuffle),
                                   kwargs.get('random_state', ClassificationArgs.random_state))
    elif style == 'multilabel':
        return make_multilabel_classification(n_samples,
                                              kwargs.get('n_features', MultilabelClassificationArgs.n_features),
                                              kwargs.get('n_classes', MultilabelClassificationArgs.n_classes),
                                              kwargs.get('n_labels', MultilabelClassificationArgs.n_labels),
                                              kwargs.get('length', MultilabelClassificationArgs.length),
                                              kwargs.get('allow_unlabeled', MultilabelClassificationArgs.allow_unlabeled),
                                              kwargs.get('sparse', MultilabelClassificationArgs.sparse),
                                              kwargs.get('return_indicator', MultilabelClassificationArgs.return_indicator),
                                              kwargs.get('return_distributions', MultilabelClassificationArgs.return_distributions),
                                              kwargs.get('random_state', MultilabelClassificationArgs.random_state))
    elif style == 'gaussian':
        return make_gaussian_quantiles(n_samples=n_samples,
                                       n_features=kwargs.get('n_features', GaussianArgs.n_features),
                                       mean=kwargs.get('mean', GaussianArgs.mean),
                                       cov=kwargs.get('cov', GaussianArgs.cov),
                                       n_classes=kwargs.get('n_classes', GaussianArgs.n_classes),
                                       shuffle=kwargs.get('shuffle', GaussianArgs.shuffle),
                                       random_state=kwargs.get('random_state', GaussianArgs.random_state))
    elif style == 'hastie':
        return make_hastie_10_2(n_samples,
                                random_state=kwargs.get('random_state', HastieArgs.random_state))
    elif style == 'circles':
        return make_circles(n_samples,
                            kwargs.get('shuffle', CirclesArgs.shuffle),
                            kwargs.get('noise', CirclesArgs.noise),
                            kwargs.get('random_state', CirclesArgs.random_state),
                            kwargs.get('factor', CirclesArgs.factor))
    elif style == 'moons':
        return make_moons(n_samples,
                          kwargs.get('shuffle', MoonsArgs.shuffle),
                          kwargs.get('noise', MoonsArgs.noise),
                          kwargs.get('random_state', MoonsArgs.random_state))
    elif style == 'biclusters':
        x = make_biclusters(kwargs.get('shape', BiclusterArgs.shape),
                            kwargs.get('n_clusters', BiclusterArgs.n_clusters),
                            kwargs.get('noise', BiclusterArgs.noise),
                            kwargs.get('minval', BiclusterArgs.minval),
                            kwargs.get('maxval', BiclusterArgs.maxval),
                            kwargs.get('shuffle', BiclusterArgs.shuffle),
                            kwargs.get('random_state', BiclusterArgs.random_state))
        if as_dataframe:
            return pd.concat([pd.DataFrame(x[0]), pd.DataFrame(x[1].T)], axis=1)
        else:
            return x

    elif style == 'scurve':
        return make_s_curve(n_samples,
                            kwargs.get('noise', SCurveArgs.noise),
                            kwargs.get('random_state', SCurveArgs.random_state))
    elif style == 'checker':
        return make_checkerboard(kwargs.get('shape', CheckerArgs.shape),
                                 kwargs.get('n_clusters', CheckerArgs.n_clusters),
                                 kwargs.get('noise', CheckerArgs.noise),
                                 kwargs.get('minval', CheckerArgs.minval),
                                 kwargs.get('maxval', CheckerArgs.maxval),
                                 kwargs.get('shuffle', CheckerArgs.shuffle),
                                 kwargs.get('random_state', CheckerArgs.random_state))
    elif style == 'friedman':
        return make_friedman1(n_samples,
                              kwargs.get('n_features', FriedmanArgs.n_features),
                              kwargs.get('noise', FriedmanArgs.noise),
                              kwargs.get('random_state', FriedmanArgs.random_state))
    elif style == 'friedman2':
        return make_friedman2(n_samples,
                              kwargs.get('noise', Friedman2Args.noise),
                              kwargs.get('random_state', Friedman2Args.random_state))
    elif style == 'friedman3':
        return make_friedman3(n_samples,
                              kwargs.get('noise', Friedman3Args.noise),
                              kwargs.get('random_state', Friedman3Args.random_state))
Exemplo n.º 21
0
import numpy as np
from matplotlib import pyplot as plt

from sklearn.datasets import make_checkerboard
from sklearn.cluster import SpectralBiclustering
from sklearn.metrics import consensus_score


n_clusters = (4, 3)
data, rows, columns = make_checkerboard(
    shape=(300, 300), n_clusters=n_clusters, noise=10,
    shuffle=False, random_state=0)

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Original dataset")

# shuffle clusters
rng = np.random.RandomState(0)
row_idx = rng.permutation(data.shape[0])
col_idx = rng.permutation(data.shape[1])
data = data[row_idx][:, col_idx]

plt.matshow(data, cmap=plt.cm.Blues)
plt.title("Shuffled dataset")

model = SpectralBiclustering(n_clusters=n_clusters, method='log',
                             random_state=0)
model.fit(data)
score = consensus_score(model.biclusters_,
                        (rows[:, row_idx], columns[:, col_idx]))
Exemplo n.º 22
0
    data = np.vstack((class_a, class_b))

    a_sign = np.ones(int(size / 2))
    b_sign = np.ones(int(size / 2)) * -1.0
    class_signs = np.hstack((a_sign, b_sign))
elif data_set == 'moon class':
    ret = datasets.make_moons(size, noise=0.1)
    data = ret[0]
    class_signs = np.array([-1 if p == 0 else 1 for p in ret[1]])

elif data_set == 'checkerboard class':
    checkerboard_shape = (size, dim)
    checkerboard_class_number = 2
    ret = datasets.make_checkerboard(checkerboard_shape,
                                     checkerboard_class_number,
                                     noise=0.1,
                                     minval=0.0,
                                     maxval=1.0)
    data = ret[0]
    class_signs = ret[1][0]
    class_signs = np.array([-1 if not p else 1 for p in class_signs])

if data_set is not 'line':
    data = scale_data(data, dim, scale)

print('plot of data set: ')
plot_dataset(data, dim, 'figures/dataPlot_' + data_set)

########### GRID EVALUATIONS

### Standard Combi