def makeExample(self, dataset=None, **opts): """ Make an example data set for testing analyses. Options include 'pca', 'factor', 'kmeans', 'ica', 'sources' See thunder.utils.datasets for detailed options. Parameters ---------- dataset : str Which dataset to generate Returns ------- data : RDD of (tuple, array) pairs Generated dataset """ from thunder.utils.datasets import DATASET_MAKERS if dataset is None: return sorted(DATASET_MAKERS.keys()) checkParams(dataset, DATASET_MAKERS.keys()) return DataSets.make(self._sc, dataset, **opts)
def test_kmeans_k2(self): """ Test k=2 also with more points""" data, centerstrue = DataSets.make(self.sc, "kmeans", k=2, nrecords=50, npartitions=5, seed=42, returnparams=True) centerstrue = KMeansModel(centerstrue) model = KMeans(k=2, maxIterations=20).fit(data) labels = array(model.predict(data).values().collect()) labelstrue = array(centerstrue.predict(data).values().collect()) assert(array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue))
def test_kmeans_k2(self): """ Test k=2 also with more points""" data, centerstrue = DataSets.make(self.sc, "kmeans", k=2, nrecords=50, npartitions=5, seed=42, returnparams=True) centerstrue = KMeansModel(centerstrue) model = KMeans(k=2, maxIterations=20).fit(data) labels = array(model.predict(data).values().collect()) labelstrue = array(centerstrue.predict(data).values().collect()) assert (array_equal(labels, labelstrue) or array_equal(labels, 1 - labelstrue))
def test_ica(self): random.seed(42) data, s, a = DataSets.make(self.sc, "ica", nrows=100, returnParams=True) ica = ICA(c=2, svdMethod="direct", seed=1) ica.fit(data) s_ = array(ica.sigs.rows().collect()) # test accurate recovery of original signals tol = 0.01 assert(allclose(abs(corrcoef(s[:, 0], s_[:, 0])[0, 1]), 1, atol=tol) or allclose(abs(corrcoef(s[:, 0], s_[:, 1])[0, 1]), 1, atol=tol)) assert(allclose(abs(corrcoef(s[:, 1], s_[:, 0])[0, 1]), 1, atol=tol) or allclose(abs(corrcoef(s[:, 1], s_[:, 1])[0, 1]), 1, atol=tol)) # test accurate reconstruction from sources assert(allclose(array(data.rows().collect()), dot(s_, ica.a.T)))
def makeExample(self, dataset, **opts): """ Make an example data set for testing analyses. Options include 'pca', 'kmeans', and 'ica'. See thunder.utils.datasets for detailed options. Parameters ---------- dataset : str Which dataset to generate Returns ------- data : RDD of (tuple, array) pairs Generated dataset """ checkparams(dataset, ['kmeans', 'pca', 'ica']) return DataSets.make(self._sc, dataset, **opts)
def makeExample(self, dataset, **opts): """ Make an example data set for testing analyses. Options include 'pca', 'kmeans', and 'ica'. See thunder.utils.datasets for detailed options. Parameters ---------- dataset : str Which dataset to generate Returns ------- data : RDD of (tuple, array) pairs Generated dataset """ checkParams(dataset, ['kmeans', 'pca', 'ica']) return DataSets.make(self._sc, dataset, **opts)