示例#1
0
    def test_ordering(self):
        pipeline = Pipeline()
        pipeline.add_step('wpca', BinaryWPCA())

        mean1 = [.5, -.5]
        mean2 = [-.5, .5]
        cov = [[1., .9], [.9, 1.]]
        X1 = np.random.multivariate_normal(mean1, cov, 10000)
        X2 = np.random.multivariate_normal(mean2, cov, 10000)

        X = np.vstack((X1, X2))
        y = np.append(np.zeros(10000), np.ones(10000))
        w = np.ones(20000)
        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(X, y, w)
        is_diag = lambda X: np.allclose(X - np.diag(np.diagonal(X)),
                                        np.zeros(X.shape))
        self.assertTrue(is_diag(np.cov(Xt.T)))

        sel1 = (y == 0)
        sel2 = (y == 1)

        distance = lambda x1, w1, x2, w2: wasserstein_distance(
            u_values=x1, v_values=x2, u_weights=w1, v_weights=w2)

        dist1 = distance(x1=X[sel1, 0], w1=w[sel1], x2=Xt[sel2, 0], w2=w[sel2])
        dist2 = distance(x1=X[sel1, 1], w1=w[sel1], x2=Xt[sel2, 1], w2=w[sel2])

        self.assertLess(dist1, dist2)
示例#2
0
    def test_PCA(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=False))

        X = np.array([[1., 2.], [2., 4.], [3., 6.]])
        w = np.array([1., 1., 1.])
        y = np.zeros(3)

        abspcc = lambda a, b: abs(np.corrcoef(a, b)[0][1])
        self.assertGreater(abspcc(X[:, 0], X[:, 1]), .99)

        pipeline.fit(X, y, w)

        pca = pipeline.get_step('pca')
        R = pca.R
        self.assertTrue(np.allclose(R.dot(R.T), np.identity(R.shape[0])))

        Xt, yt, wt = pipeline.transform(X, y, w)
        self.assertTrue(np.allclose(y, yt))
        self.assertTrue(np.allclose(w, wt))

        cov = np.cov(Xt.T)
        self.assertAlmostEqual(cov[0, 0], 5.)
        self.assertAlmostEqual(cov[1, 0], 0.)
        self.assertAlmostEqual(cov[0, 1], 0.)
        self.assertAlmostEqual(cov[1, 1], 0.)
示例#3
0
    def test_fit_decoupling(self):
        pipeline = Pipeline()
        indices = [2, 4]
        pipeline.add_step('center', Center(), indices)

        X = np.random.rand(5, 10)
        X[:, indices[0]] = np.arange(5).astype(float)
        X[:, indices[1]] = np.arange(5).astype(float) * 2
        mean = np.mean(X[:, indices], axis=0)
        self.assertAlmostEqual(mean[0], 2.)
        self.assertAlmostEqual(mean[1], 4.)

        y = np.arange(5)
        w = np.arange(5)

        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(np.ones_like(X), y, w)

        means = Xt.mean(axis=0)
        self.assertAlmostEqual(means[0], 1.)
        self.assertAlmostEqual(means[1], 1.)
        self.assertAlmostEqual(means[2], -1.)
        self.assertAlmostEqual(means[3], 1.)
        self.assertAlmostEqual(means[4], -3.)
示例#4
0
    def test_weighting(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=False))

        X1 = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w1 = np.array([0., 1., 2., 1., 1.])
        y1 = np.array([0, 0, 0, 1, 2])

        X2 = np.array([[2., 4.], [3., 6.], [3., 6.], [4., 8.], [5., 10.]])
        w2 = np.array([0., 1., 2., 1., 1.])
        y2 = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X1, y1, w1)
        Xt1, _, _ = pipeline.transform(X1, y1, w1)
        means1 = Xt1.mean(axis=0)
        stds1 = Xt1.std(axis=0, ddof=1)

        pipeline.fit(X2, y2, w2)
        Xt2, _, _ = pipeline.transform(X2, y2, w2)
        means2 = Xt1.mean(axis=0)
        stds2 = Xt1.std(axis=0, ddof=1)

        for mean1, mean2 in zip(means1, means2):
            self.assertAlmostEqual(mean1, mean2)

        for std1, std2 in zip(stds1, stds2):
            self.assertAlmostEqual(std1, std2)
示例#5
0
    def test_indices_wildcard(self):
        pipeline = Pipeline()
        pipeline.add_step('overwrite', Overwrite(value=0), '*')

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.X[:, :] = 0
        self.assertTrue(np.allclose(self.X, Xt))
示例#6
0
    def test_unit_step(self):
        pipeline = Pipeline()
        pipeline.add_step('unit', BaseStep('unit'), [2, 4])
        self.assertTrue(pipeline.has_step('unit'))

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.assertTrue(np.allclose(self.X, Xt))
示例#7
0
    def test_Overwriter(self):
        pipeline = Pipeline()
        indices = [2, 4]
        pipeline.add_step('overwrite', Overwrite(value=0), indices)

        self.fit(pipeline)
        Xt = self.transform(pipeline)

        self.X[:, indices] = 0
        self.assertTrue(np.allclose(self.X, Xt))
示例#8
0
    def test_standardize(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(standardize=True))

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w = np.array([1., 1., 1., 1., 1.])
        y = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X, y, w)
        Xt, _, _ = pipeline.transform(X, y, w)
        for mean, std in zip(Xt.mean(axis=0), Xt.std(axis=0, ddof=1)):
            self.assertAlmostEqual(mean, 0.)
            self.assertAlmostEqual(std, 1.)
示例#9
0
    def test_PCA_ignore(self):
        pipeline = Pipeline()
        pipeline.add_step('pca', PCA(ignore=[1, 2], standardize=False))

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.], [5., 10.]])
        w = np.array([1., 1., 1., 1., 1.])
        y = np.array([0, 0, 0, 1, 2])

        pipeline.fit(X, y, w)

        Xt, _, _ = pipeline.transform(X, y, w)
        cov = np.cov(Xt[(y != 1) & (y != 2)].T)
        self.assertAlmostEqual(cov[0, 0], 5.)
        self.assertAlmostEqual(cov[1, 0], 0.)
        self.assertAlmostEqual(cov[0, 1], 0.)
        self.assertAlmostEqual(cov[1, 1], 0.)
示例#10
0
    def test_standardize(self):
        pipeline = Pipeline()
        pipeline.add_step('std', Standardizer())

        X = np.array([[1., 2.], [2., 4.], [3., 6.], [4., 8.]])
        y = np.zeros(4)
        w = np.array([2., 3., 3., 4.])

        pipeline.fit(X, y, w)

        step = pipeline.get_step('std')
        self.assertAlmostEqual(step.mean[0], 2.75)
        self.assertAlmostEqual(step.mean[1], 5.5)
        self.assertAlmostEqual(step.std[0], np.sqrt(14.25) / 3.)
        self.assertAlmostEqual(step.std[1], np.sqrt(57.) / 3.)

        Xt, _, _ = pipeline.transform(X, y, w)

        X = np.array([
            (np.array([1., 2., 3., 4.]) - 2.75) / (np.sqrt(14.25) / 3.),
            (np.array([2., 4., 6., 8.]) - 5.5) / (np.sqrt(57.) / 3.)
        ]).T

        self.assertTrue(np.allclose(X, Xt))