Пример #1
0
class testPCA:
    """
    Tests for PCA preprocessor
    """
    def setup(self):
        rng = np.random.RandomState([1, 2, 3])
        self.dataset = DenseDesignMatrix(X=as_floatX(rng.randn(15, 10)),
                                         y=as_floatX(rng.randn(15, 1)))
        self.num_components = self.dataset.get_design_matrix().shape[1] - 1

    def test_apply_no_whiten(self):
        """
        Confirms that PCA has decorrelated the input dataset and
        principal components are arranged in decreasing order by variance
        """
        # sut is an abbreviation for System Under Test
        sut = PCA(self.num_components)
        sut.apply(self.dataset, True)
        cm = np.cov(self.dataset.get_design_matrix().T)  # covariance matrix

        # testing whether the covariance matrix is a diagonal one
        np.testing.assert_almost_equal(
            cm * (np.ones(cm.shape[0]) - np.eye(cm.shape[0])),
            np.zeros((cm.shape[0], cm.shape[0])))

        # testing whether the eigenvalues are in decreasing order
        assert (np.diag(cm)[:-1] > np.diag(cm)[1:]).all()

    def test_apply_whiten(self):
        """
        Confirms that PCA has decorrelated the input dataset and
        variance is the same along all principal components and equal to one
         """
        sut = PCA(self.num_components, whiten=True)
        sut.apply(self.dataset, True)
        cm = np.cov(self.dataset.get_design_matrix().T)  # covariance matrix

        # testing whether the covariance matrix is a diagonal one
        np.testing.assert_almost_equal(
            cm * (np.ones(cm.shape[0]) - np.eye(cm.shape[0])),
            np.zeros((cm.shape[0], cm.shape[0])))

        # testing whether the eigenvalues are all ones
        np.testing.assert_almost_equal(np.diag(cm), np.ones(cm.shape[0]))

    def test_apply_reduce_num_components(self):
        """
        Checks whether PCA performs dimensionality reduction
        """
        sut = PCA(self.num_components - 1, whiten=True)
        sut.apply(self.dataset, True)

        assert self.dataset.get_design_matrix().shape[1] ==\
            self.num_components - 1
Пример #2
0
class testPCA:
    """
    Tests for PCA preprocessor
    """
    def setup(self):
        rng = np.random.RandomState([1, 2, 3])
        self.dataset = DenseDesignMatrix(X=as_floatX(rng.randn(15, 10)),
                                         y=as_floatX(rng.randn(15, 1)))
        self.num_components = self.dataset.get_design_matrix().shape[1] - 1

    def test_apply_no_whiten(self):
        """
        Confirms that PCA has decorrelated the input dataset and
        principal components are arranged in decreasing order by variance
        """
        # sut is an abbreviation for System Under Test
        sut = PCA(self.num_components)
        sut.apply(self.dataset, True)
        cm = np.cov(self.dataset.get_design_matrix().T)  # covariance matrix

        # testing whether the covariance matrix is a diagonal one
        np.testing.assert_almost_equal(cm * (np.ones(cm.shape[0]) -
                                       np.eye(cm.shape[0])),
                                       np.zeros((cm.shape[0], cm.shape[0])))

        # testing whether the eigenvalues are in decreasing order
        assert (np.diag(cm)[:-1] > np.diag(cm)[1:]).all()

    def test_apply_whiten(self):
        """
        Confirms that PCA has decorrelated the input dataset and
        variance is the same along all principal components and equal to one
         """
        sut = PCA(self.num_components, whiten=True)
        sut.apply(self.dataset, True)
        cm = np.cov(self.dataset.get_design_matrix().T)  # covariance matrix

        # testing whether the covariance matrix is a diagonal one
        np.testing.assert_almost_equal(cm * (np.ones(cm.shape[0]) -
                                       np.eye(cm.shape[0])),
                                       np.zeros((cm.shape[0], cm.shape[0])))

        # testing whether the eigenvalues are all ones
        np.testing.assert_almost_equal(np.diag(cm), np.ones(cm.shape[0]))

    def test_apply_reduce_num_components(self):
        """
        Checks whether PCA performs dimensionality reduction
        """
        sut = PCA(self.num_components - 1, whiten=True)
        sut.apply(self.dataset, True)

        assert self.dataset.get_design_matrix().shape[1] ==\
            self.num_components - 1
Пример #3
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
Пример #4
0
def test_init_with_X_or_topo():
    # tests that constructing with topo_view works
    # tests that construction with design matrix works
    # tests that conversion from topo_view to design matrix and back works
    # tests that conversion the other way works too
    rng = np.random.RandomState([1, 2, 3])
    topo_view = rng.randn(5, 2, 2, 3)
    d1 = DenseDesignMatrix(topo_view=topo_view)
    X = d1.get_design_matrix()
    d2 = DenseDesignMatrix(X=X, view_converter=d1.view_converter)
    topo_view_2 = d2.get_topological_view()
    assert np.allclose(topo_view, topo_view_2)
    X = rng.randn(*X.shape)
    topo_view_3 = d2.get_topological_view(X)
    X2 = d2.get_design_matrix(topo_view_3)
    assert np.allclose(X, X2)
Пример #5
0
    def test_unit_norm(self):
        """ Test that using std_bias = 0.0 and use_norm = True
            results in vectors having unit norm """

        tol = 1e-5

        num_examples = 5
        num_features = 10

        rng = np.random.RandomState([1, 2, 3])

        X = as_floatX(rng.randn(num_examples, num_features))

        dataset = DenseDesignMatrix(X=X)

        # the setting of subtract_mean is not relevant to the test
        # the test only applies when std_bias = 0.0 and use_std = False
        preprocessor = GlobalContrastNormalization(subtract_mean=False,
                                                   sqrt_bias=0.0,
                                                   use_std=False)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        norms = np.sqrt(np.square(result).sum(axis=1))

        max_norm_error = np.abs(norms - 1.).max()

        tol = 3e-5

        assert max_norm_error < tol
Пример #6
0
    def next(self):
        next_index = self._subset_iterator.next()

        # convert to boolean selection
        sel = np.zeros(self.num_examples, dtype=bool)
        sel[next_index] = True
        next_index = sel

        rval = []
        for data, fn in safe_izip(self._raw_data, self._convert):
            try:
                this_data = data[next_index]
            except TypeError:
                this_data = data[next_index, :]
            if fn:
                this_data = fn(this_data)
            if self._preprocessor is not None:
                d = DenseDesignMatrix(X=this_data)
                self._preprocessor.apply(d)
                this_data = d.get_design_matrix()
            assert not np.any(np.isnan(this_data))
            rval.append(this_data)
        rval = tuple(rval)
        if not self._return_tuple and len(rval) == 1:
            rval, = rval
        return rval    
def test_init_with_X_or_topo():
    # tests that constructing with topo_view works
    # tests that construction with design matrix works
    # tests that conversion from topo_view to design matrix and back works
    # tests that conversion the other way works too
    rng = np.random.RandomState([1, 2, 3])
    topo_view = rng.randn(5, 2, 2, 3)
    d1 = DenseDesignMatrix(topo_view=topo_view)
    X = d1.get_design_matrix()
    d2 = DenseDesignMatrix(X=X, view_converter=d1.view_converter)
    topo_view_2 = d2.get_topological_view()
    assert np.allclose(topo_view, topo_view_2)
    X = rng.randn(*X.shape)
    topo_view_3 = d2.get_topological_view(X)
    X2 = d2.get_design_matrix(topo_view_3)
    assert np.allclose(X, X2)
Пример #8
0
def apply_ZCA_fast(patches, normalize, zca_preprocessor):
    patches = patches.astype(np.float32)
    if normalize:
        patches /= 255.0
    dataset = DenseDesignMatrix(X = patches.T)    
    zca_preprocessor.apply(dataset)
    patches = dataset.get_design_matrix()
    return patches.T
Пример #9
0
        def test(store_inverse):
            preprocessed_X = copy.copy(self.X)
            preprocessor = ZCA(store_inverse=store_inverse)

            dataset = DenseDesignMatrix(X=preprocessed_X,
                                        preprocessor=preprocessor,
                                        fit_preprocessor=True)

            preprocessed_X = dataset.get_design_matrix()
            assert_allclose(self.X, preprocessor.inverse(preprocessed_X))
Пример #10
0
    def test(store_inverse):
        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(15, 10))
        preprocessed_X = copy.copy(X)
        preprocessor = ZCA(store_inverse=store_inverse)

        dataset = DenseDesignMatrix(X=preprocessed_X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)

        preprocessed_X = dataset.get_design_matrix()

        assert_allclose(X, preprocessor.inverse(preprocessed_X))
Пример #11
0
    def test(store_inverse):
        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(15, 10))
        preprocessed_X = copy.copy(X)
        preprocessor = ZCA(store_inverse=store_inverse)

        dataset = DenseDesignMatrix(X=preprocessed_X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)

        preprocessed_X = dataset.get_design_matrix()

        assert_allclose(X, preprocessor.inverse(preprocessed_X))
Пример #12
0
    def test_zero_image(self):
        """
        Test on zero-value image if cause any division by zero
        """

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Пример #13
0
    def test_zero_image(self):
        """
        Test on zero-value image if cause any division by zero
        """

        X = as_floatX(np.zeros((5, 32 * 32 * 3)))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Пример #14
0
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True, sqrt_bias=0.0, use_std=True)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Пример #15
0
    def test_channel(self):
        """
        Test if works fine withe different number of channel as argument
        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Пример #16
0
    def test_channel(self):
        """
        Test if works fine withe different number of channel as argument
        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32], channels=[1, 2])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert isfinite(result)
Пример #17
0
    def test_zero_vector(self):
        """ Test that passing in the zero vector does not result in
            a divide by 0 """

        dataset = DenseDesignMatrix(X=as_floatX(np.zeros((1, 1))))

        # the settings of subtract_mean and use_norm are not relevant to
        # the test
        # std_bias = 0.0 is the only value for which there should be a risk
        # of failure occurring
        preprocessor = GlobalContrastNormalization(subtract_mean=True,
                                                   sqrt_bias=0.0,
                                                   use_std=True)

        dataset.apply_preprocessor(preprocessor)

        result = dataset.get_design_matrix()

        assert isfinite(result)
Пример #18
0
def test_rgb_yuv():
    """
    Test on a random image if the per-processor loads and works without
    anyerror and doesn't result in any nan or inf values

    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(5, 32 * 32 * 3))

    axes = ['b', 0, 1, 'c']
    view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                              axes)
    dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
    dataset.axes = axes
    preprocessor = RGB_YUV()
    dataset.apply_preprocessor(preprocessor)
    result = dataset.get_design_matrix()

    assert isfinite(result)
Пример #19
0
    def test_random_image(self):
        """
        Test on a random image if the per-processor loads and works without
        anyerror and doesn't result in any nan or inf values

        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ["b", 0, 1, "c"]
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3), axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Пример #20
0
def test_rgb_yuv():
    """
    Test on a random image if the per-processor loads and works without
    anyerror and doesn't result in any nan or inf values

    """

    rng = np.random.RandomState([1, 2, 3])
    X = as_floatX(rng.randn(5, 32 * 32 * 3))

    axes = ['b', 0, 1, 'c']
    view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                              axes)
    dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
    dataset.axes = axes
    preprocessor = RGB_YUV()
    dataset.apply_preprocessor(preprocessor)
    result = dataset.get_design_matrix()

    assert isfinite(result)
Пример #21
0
    def test_random_image(self):
        """
        Test on a random image if the per-processor loads and works without
        anyerror and doesn't result in any nan or inf values

        """

        rng = np.random.RandomState([1, 2, 3])
        X = as_floatX(rng.randn(5, 32 * 32 * 3))

        axes = ['b', 0, 1, 'c']
        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3),
                                                                  axes)
        dataset = DenseDesignMatrix(X=X, view_converter=view_converter)
        dataset.axes = axes
        preprocessor = LeCunLCN(img_shape=[32, 32])
        dataset.apply_preprocessor(preprocessor)
        result = dataset.get_design_matrix()

        assert not np.any(np.isnan(result))
        assert not np.any(np.isinf(result))
Пример #22
0
    def __call__(self, full_X):

        feature_type = self.feature_type
        pooling_region_counts = self.pooling_region_counts
        model = self.model
        size = self.size

        nan = 0


        full_X = full_X.reshape(1,full_X.shape[0],full_X.shape[1],full_X.shape[2])

        if full_X.shape[3] == 1:
            full_X = np.concatenate( (full_X, full_X, full_X), axis=3)

        print 'full_X.shape: '+str(full_X.shape)

        num_examples = full_X.shape[0]
        assert num_examples == 1

        pipeline = self.preprocessor


        def average_pool( stride ):
            def point( p ):
                return p * ns / stride

            rval = np.zeros( (topo_feat.shape[0], stride, stride, topo_feat.shape[3] ) , dtype = 'float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:,i,j,:] = self.region_features( topo_feat[:,point(i):point(i+1), point(j):point(j+1),:] )

            return rval

        outputs = [ np.zeros((num_examples,count,count,model.nhid),dtype='float32') for count in pooling_region_counts ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X = np.zeros((1,1),dtype='float32'), view_converter = DefaultViewConverter([1, 1, model.nhid] ) )

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches( orig_shape  = (ns, ns), patch_shape=(1,1) )

        batch_size = 1

        for i in xrange(0,num_examples-batch_size+1,batch_size):
            print i
            t1 = time.time()

            d = DenseDesignMatrix( topo_view =  np.cast['float32'](full_X[i:i+batch_size,:]), view_converter = DefaultViewConverter((32,32,3)))

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit = False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = self.f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i+batch_size,...] = average_pool(count)

            t6 = time.time()

            print (t6-t1, t2-t1, t3-t2, t4-t3, t5-t4, t6-t5)

        return outputs[0]
Пример #23
0
class testZCA:

    def setup(self):
        """
        We use a small predefined 8x5 matrix for
        which we know the ZCA transform.
        """
        self.X = np.array([[-10.0, 3.0, 19.0, 9.0, -15.0],
                          [7.0, 26.0, 26.0, 26.0, -3.0],
                          [17.0, -17.0, -37.0, -36.0, -11.0],
                          [19.0, 15.0, -2.0, 5.0, 9.0],
                          [-3.0, -8.0, -35.0, -25.0, -8.0],
                          [-18.0, 3.0, 4.0, 15.0, 14.0],
                          [5.0, -4.0, -5.0, -7.0, -11.0],
                          [23.0, 22.0, 15.0, 20.0, 12.0]])
        self.dataset = DenseDesignMatrix(X=as_floatX(self.X),
                                         y=as_floatX(np.ones((8, 1))))
        self.num_components = self.dataset.get_design_matrix().shape[1] - 1

    def get_preprocessed_data(self, preprocessor):
        X = copy.copy(self.X)
        dataset = DenseDesignMatrix(X=X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)
        return dataset.get_design_matrix()

    def test_zca(self):
        """
        Confirm that ZCA.inv_P_ is the correct inverse of ZCA.P_.
        There's a lot else about the ZCA class that could be tested here.
        """
        preprocessor = ZCA()
        preprocessor.fit(self.X)

        identity = np.identity(self.X.shape[1], theano.config.floatX)
        # Check some basics of transformation matrix
        assert preprocessor.P_.shape == (self.X.shape[1], self.X.shape[1])
        assert_allclose(np.dot(preprocessor.P_,
                               preprocessor.inv_P_), identity, rtol=1e-4)

        preprocessor = ZCA(filter_bias=0.0)
        preprocessed_X = self.get_preprocessed_data(preprocessor)

        # Check if preprocessed data matrix is white
        assert_allclose(np.cov(preprocessed_X.transpose(),
                               bias=1), identity, rtol=1e-4)

        # Check if we obtain correct solution
        zca_transformed_X = np.array(
            [[-1.0199, -0.1832, 1.9528, -0.9603, -0.8162],
             [0.0729, 1.4142, 0.2529, 1.1861, -1.0876],
             [0.9575, -1.1173, -0.5435, -1.4372, -0.1057],
             [0.6348, 1.1258, 0.2692, -0.8893, 1.1669],
             [-0.9769, 0.8297, -1.8676, -0.6055, -0.5096],
             [-1.5700, -0.8389, -0.0931, 0.8877, 1.6089],
             [0.4993, -1.4219, -0.3443, 0.9664, -1.1022],
             [1.4022, 0.1917, 0.3736, 0.8520, 0.8456]]
        )
        assert_allclose(preprocessed_X, zca_transformed_X, rtol=1e-3)

    def test_num_components(self):
        # Keep 3 components
        preprocessor = ZCA(filter_bias=0.0, n_components=3)
        preprocessed_X = self.get_preprocessed_data(preprocessor)

        zca_truncated_X = np.array(
            [[-0.8938, -0.3084, 1.1105, 0.1587, -1.4073],
             [0.3346, 0.5193, 1.1371, 0.6545, -0.4199],
             [0.7613, -0.4823, -1.0578, -1.1997, -0.4993],
             [0.9250, 0.5012, -0.2743, 0.1735, 0.8105],
             [-0.4928, -0.6319, -1.0359, -0.7173, 0.1469],
             [-1.8060, -0.1758, -0.2943, 0.7208, 1.4359],
             [0.0079, -0.2582, 0.1368, -0.3571, -0.8147],
             [1.1636, 0.8362, 0.2777, 0.5666, 0.7480]]
        )
        assert_allclose(zca_truncated_X, preprocessed_X, rtol=1e-3)

        # Drop 2 components: result should be similar
        preprocessor = ZCA(filter_bias=0.0, n_drop_components=2)
        preprocessed_X = self.get_preprocessed_data(preprocessor)
        assert_allclose(zca_truncated_X, preprocessed_X, rtol=1e-3)

    def test_zca_inverse(self):
        """
        Calculates the inverse of X with numpy.linalg.inv
        if inv_P_ is not stored.
        """
        def test(store_inverse):
            preprocessed_X = copy.copy(self.X)
            preprocessor = ZCA(store_inverse=store_inverse)

            dataset = DenseDesignMatrix(X=preprocessed_X,
                                        preprocessor=preprocessor,
                                        fit_preprocessor=True)

            preprocessed_X = dataset.get_design_matrix()
            assert_allclose(self.X, preprocessor.inverse(preprocessed_X))

        test(store_inverse=True)
        test(store_inverse=False)

    def test_zca_dtypes(self):
        """
        Confirm that ZCA.fit works regardless of dtype of
        data and config.floatX
        """

        orig_floatX = config.floatX

        try:
            for floatX in ['float32', 'float64']:
                for dtype in ['float32', 'float64']:
                    preprocessor = ZCA()
                    preprocessor.fit(self.X)
        finally:
            config.floatX = orig_floatX
Пример #24
0
 def get_preprocessed_data(self, preprocessor):
     X = copy.copy(self.X)
     dataset = DenseDesignMatrix(X=X,
                                 preprocessor=preprocessor,
                                 fit_preprocessor=True)
     return dataset.get_design_matrix()
Пример #25
0
    def __call__(self, full_X):

        feature_type = self.feature_type
        pooling_region_counts = self.pooling_region_counts
        model = self.model
        size = self.size

        nan = 0

        full_X = full_X.reshape(1, full_X.shape[0], full_X.shape[1],
                                full_X.shape[2])

        if full_X.shape[3] == 1:
            full_X = np.concatenate((full_X, full_X, full_X), axis=3)

        print 'full_X.shape: ' + str(full_X.shape)

        num_examples = full_X.shape[0]
        assert num_examples == 1

        pipeline = self.preprocessor

        def average_pool(stride):
            def point(p):
                return p * ns / stride

            rval = np.zeros(
                (topo_feat.shape[0], stride, stride, topo_feat.shape[3]),
                dtype='float32')

            for i in xrange(stride):
                for j in xrange(stride):
                    rval[:, i, j, :] = self.region_features(
                        topo_feat[:,
                                  point(i):point(i + 1),
                                  point(j):point(j + 1), :])

            return rval

        outputs = [
            np.zeros((num_examples, count, count, model.nhid), dtype='float32')
            for count in pooling_region_counts
        ]

        assert len(outputs) > 0

        fd = DenseDesignMatrix(X=np.zeros((1, 1), dtype='float32'),
                               view_converter=DefaultViewConverter(
                                   [1, 1, model.nhid]))

        ns = 32 - size + 1
        depatchifier = ReassembleGridPatches(orig_shape=(ns, ns),
                                             patch_shape=(1, 1))

        batch_size = 1

        for i in xrange(0, num_examples - batch_size + 1, batch_size):
            print i
            t1 = time.time()

            d = DenseDesignMatrix(
                topo_view=np.cast['float32'](full_X[i:i + batch_size, :]),
                view_converter=DefaultViewConverter((32, 32, 3)))

            t2 = time.time()

            #print '\tapplying preprocessor'
            d.apply_preprocessor(pipeline, can_fit=False)
            X2 = d.get_design_matrix()

            t3 = time.time()

            #print '\trunning theano function'
            feat = self.f(X2)

            t4 = time.time()

            assert feat.dtype == 'float32'

            feat_dataset = copy.copy(fd)

            if np.any(np.isnan(feat)):
                nan += np.isnan(feat).sum()
                feat[np.isnan(feat)] = 0

            feat_dataset.set_design_matrix(feat)

            #print '\treassembling features'
            feat_dataset.apply_preprocessor(depatchifier)

            #print '\tmaking topological view'
            topo_feat = feat_dataset.get_topological_view()
            assert topo_feat.shape[0] == batch_size

            t5 = time.time()

            #average pooling
            for output, count in zip(outputs, pooling_region_counts):
                output[i:i + batch_size, ...] = average_pool(count)

            t6 = time.time()

            print(t6 - t1, t2 - t1, t3 - t2, t4 - t3, t5 - t4, t6 - t5)

        return outputs[0]
Пример #26
0
 def get_preprocessed_data(self, preprocessor):
     X = copy.copy(self.X)
     dataset = DenseDesignMatrix(X=X,
                                 preprocessor=preprocessor,
                                 fit_preprocessor=True)
     return dataset.get_design_matrix()
Пример #27
0
class testZCA:
    def setup(self):
        """
        We use a small predefined 8x5 matrix for
        which we know the ZCA transform.
        """
        self.X = np.array([[-10.0, 3.0, 19.0, 9.0, -15.0],
                           [7.0, 26.0, 26.0, 26.0, -3.0],
                           [17.0, -17.0, -37.0, -36.0, -11.0],
                           [19.0, 15.0, -2.0, 5.0, 9.0],
                           [-3.0, -8.0, -35.0, -25.0, -8.0],
                           [-18.0, 3.0, 4.0, 15.0, 14.0],
                           [5.0, -4.0, -5.0, -7.0, -11.0],
                           [23.0, 22.0, 15.0, 20.0, 12.0]])
        self.dataset = DenseDesignMatrix(X=as_floatX(self.X),
                                         y=as_floatX(np.ones((8, 1))))
        self.num_components = self.dataset.get_design_matrix().shape[1] - 1

    def get_preprocessed_data(self, preprocessor):
        X = copy.copy(self.X)
        dataset = DenseDesignMatrix(X=X,
                                    preprocessor=preprocessor,
                                    fit_preprocessor=True)
        return dataset.get_design_matrix()

    def test_zca(self):
        """
        Confirm that ZCA.inv_P_ is the correct inverse of ZCA.P_.
        There's a lot else about the ZCA class that could be tested here.
        """
        preprocessor = ZCA()
        preprocessor.fit(self.X)

        identity = np.identity(self.X.shape[1], theano.config.floatX)
        # Check some basics of transformation matrix
        assert preprocessor.P_.shape == (self.X.shape[1], self.X.shape[1])
        assert_allclose(np.dot(preprocessor.P_, preprocessor.inv_P_),
                        identity,
                        rtol=1e-4)

        preprocessor = ZCA(filter_bias=0.0)
        preprocessed_X = self.get_preprocessed_data(preprocessor)

        # Check if preprocessed data matrix is white
        assert_allclose(np.cov(preprocessed_X.transpose(), bias=1),
                        identity,
                        rtol=1e-4,
                        atol=1e-4)

        # Check if we obtain correct solution
        zca_transformed_X = np.array(
            [[-1.0199, -0.1832, 1.9528, -0.9603, -0.8162],
             [0.0729, 1.4142, 0.2529, 1.1861, -1.0876],
             [0.9575, -1.1173, -0.5435, -1.4372, -0.1057],
             [0.6348, 1.1258, 0.2692, -0.8893, 1.1669],
             [-0.9769, 0.8297, -1.8676, -0.6055, -0.5096],
             [-1.5700, -0.8389, -0.0931, 0.8877, 1.6089],
             [0.4993, -1.4219, -0.3443, 0.9664, -1.1022],
             [1.4022, 0.1917, 0.3736, 0.8520, 0.8456]])
        assert_allclose(preprocessed_X, zca_transformed_X, rtol=1e-3)

    def test_num_components(self):
        # Keep 3 components
        preprocessor = ZCA(filter_bias=0.0, n_components=3)
        preprocessed_X = self.get_preprocessed_data(preprocessor)

        zca_truncated_X = np.array(
            [[-0.8938, -0.3084, 1.1105, 0.1587, -1.4073],
             [0.3346, 0.5193, 1.1371, 0.6545, -0.4199],
             [0.7613, -0.4823, -1.0578, -1.1997, -0.4993],
             [0.9250, 0.5012, -0.2743, 0.1735, 0.8105],
             [-0.4928, -0.6319, -1.0359, -0.7173, 0.1469],
             [-1.8060, -0.1758, -0.2943, 0.7208, 1.4359],
             [0.0079, -0.2582, 0.1368, -0.3571, -0.8147],
             [1.1636, 0.8362, 0.2777, 0.5666, 0.7480]])
        assert_allclose(zca_truncated_X, preprocessed_X, rtol=1e-3)

        # Drop 2 components: result should be similar
        preprocessor = ZCA(filter_bias=0.0, n_drop_components=2)
        preprocessed_X = self.get_preprocessed_data(preprocessor)
        assert_allclose(zca_truncated_X, preprocessed_X, rtol=1e-3)

    def test_zca_inverse(self):
        """
        Calculates the inverse of X with numpy.linalg.inv
        if inv_P_ is not stored.
        """
        def test(store_inverse):
            preprocessed_X = copy.copy(self.X)
            preprocessor = ZCA(store_inverse=store_inverse)

            dataset = DenseDesignMatrix(X=preprocessed_X,
                                        preprocessor=preprocessor,
                                        fit_preprocessor=True)

            preprocessed_X = dataset.get_design_matrix()
            assert_allclose(self.X,
                            preprocessor.inverse(preprocessed_X),
                            atol=5e-5,
                            rtol=1e-5)

        test(store_inverse=True)
        test(store_inverse=False)

    def test_zca_dtypes(self):
        """
        Confirm that ZCA.fit works regardless of dtype of
        data and config.floatX
        """

        orig_floatX = config.floatX

        try:
            for floatX in ['float32', 'float64']:
                for dtype in ['float32', 'float64']:
                    preprocessor = ZCA()
                    preprocessor.fit(self.X)
        finally:
            config.floatX = orig_floatX