Пример #1
0
    def makeExample(self, dataset=None, **opts):
        """
        Make an example data set for testing analyses.

        Options include 'pca', 'factor', 'kmeans', 'ica', 'sources'
        See thunder.utils.datasets for detailed options.

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset

        """
        from thunder.utils.datasets import DATASET_MAKERS

        if dataset is None:
            return sorted(DATASET_MAKERS.keys())

        checkParams(dataset, DATASET_MAKERS.keys())

        return DataSets.make(self._sc, dataset, **opts)
Пример #2
0
    def makeExample(self, dataset=None, **opts):
        """
        Make an example data set for testing analyses.

        Options include 'pca', 'factor', 'kmeans', 'ica', 'sources'
        See thunder.utils.datasets for detailed options.

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset

        """
        from thunder.utils.datasets import DATASET_MAKERS

        if dataset is None:
            return sorted(DATASET_MAKERS.keys())

        checkParams(dataset, DATASET_MAKERS.keys())

        return DataSets.make(self._sc, dataset, **opts)
Пример #3
0
    def test_kmeans_k2(self):
        """ Test k=2 also with more points"""

        data, centerstrue = DataSets.make(self.sc, "kmeans",
                                          k=2, nrecords=50, npartitions=5, seed=42, returnparams=True)
        centerstrue = KMeansModel(centerstrue)

        model = KMeans(k=2, maxIterations=20).fit(data)

        labels = array(model.predict(data).values().collect())
        labelstrue = array(centerstrue.predict(data).values().collect())

        assert(array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue))
Пример #4
0
    def test_kmeans_k2(self):
        """ Test k=2 also with more points"""

        data, centerstrue = DataSets.make(self.sc,
                                          "kmeans",
                                          k=2,
                                          nrecords=50,
                                          npartitions=5,
                                          seed=42,
                                          returnparams=True)
        centerstrue = KMeansModel(centerstrue)

        model = KMeans(k=2, maxIterations=20).fit(data)

        labels = array(model.predict(data).values().collect())
        labelstrue = array(centerstrue.predict(data).values().collect())

        assert (array_equal(labels, labelstrue)
                or array_equal(labels, 1 - labelstrue))
    def test_ica(self):

        random.seed(42)
        data, s, a = DataSets.make(self.sc, "ica", nrows=100, returnParams=True)

        ica = ICA(c=2, svdMethod="direct", seed=1)
        ica.fit(data)

        s_ = array(ica.sigs.rows().collect())

        # test accurate recovery of original signals
        tol = 0.01
        assert(allclose(abs(corrcoef(s[:, 0], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 0], s_[:, 1])[0, 1]), 1, atol=tol))
        assert(allclose(abs(corrcoef(s[:, 1], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 1], s_[:, 1])[0, 1]), 1, atol=tol))

        # test accurate reconstruction from sources
        assert(allclose(array(data.rows().collect()), dot(s_, ica.a.T)))
    def test_ica(self):

        random.seed(42)
        data, s, a = DataSets.make(self.sc, "ica", nrows=100, returnParams=True)

        ica = ICA(c=2, svdMethod="direct", seed=1)
        ica.fit(data)

        s_ = array(ica.sigs.rows().collect())

        # test accurate recovery of original signals
        tol = 0.01
        assert(allclose(abs(corrcoef(s[:, 0], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 0], s_[:, 1])[0, 1]), 1, atol=tol))
        assert(allclose(abs(corrcoef(s[:, 1], s_[:, 0])[0, 1]), 1, atol=tol)
               or allclose(abs(corrcoef(s[:, 1], s_[:, 1])[0, 1]), 1, atol=tol))

        # test accurate reconstruction from sources
        assert(allclose(array(data.rows().collect()), dot(s_, ica.a.T)))
Пример #7
0
    def makeExample(self, dataset, **opts):
        """
        Make an example data set for testing analyses.

        Options include 'pca', 'kmeans', and 'ica'.
        See thunder.utils.datasets for detailed options.

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset

        """
        checkparams(dataset, ['kmeans', 'pca', 'ica'])

        return DataSets.make(self._sc, dataset, **opts)
Пример #8
0
    def makeExample(self, dataset, **opts):
        """
        Make an example data set for testing analyses.

        Options include 'pca', 'kmeans', and 'ica'.
        See thunder.utils.datasets for detailed options.

        Parameters
        ----------
        dataset : str
            Which dataset to generate

        Returns
        -------
        data : RDD of (tuple, array) pairs
            Generated dataset

        """
        checkParams(dataset, ['kmeans', 'pca', 'ica'])

        return DataSets.make(self._sc, dataset, **opts)