Пример #1
0
def testSFANode_rank_deficit():
    def test_for_data(dat,
                      dat0,
                      dfc,
                      out,
                      eq_pr_dict=None,
                      rk_thr_dict=None,
                      check_data=None,
                      check_dfc=None):
        if eq_pr_dict is None:
            eq_pr_dict = {'reg': 5, 'pca': 5, 'svd': 5, 'ldl': 5}
        if check_data is None:
            check_data = {'reg': True, 'pca': True, 'svd': True, 'ldl': True}
        if check_dfc is None:
            check_dfc = {'reg': True, 'pca': True, 'svd': True, 'ldl': True}
        if rk_thr_dict is None:
            rk_thr_dict = { \
                    'reg': 1e-10, 'pca': 1e-10, 'svd': 1e-10, 'ldl': 1e-10}
        sfa0 = mdp.nodes.SFANode(output_dim=out)
        sfa0.train(dat0)
        sfa0.stop_training()
        sdat0 = sfa0.execute(dat0)

        sfa2_reg = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='reg')
        sfa2_reg.rank_threshold = rk_thr_dict['reg']
        # This is equivalent to sfa2._sfa_solver = sfa2._rank_deficit_solver_reg
        sfa2_reg.train(dat)
        sfa2_reg.stop_training()
        sdat_reg = sfa2_reg.execute(dat)

        sfa2_pca = mdp.nodes.SFANode(output_dim=out)
        # For this test we add the rank_deficit_solver later, so we can
        # assert that ordinary SFA would actually fail on the data.
        sfa2_pca.train(dat)
        try:
            sfa2_pca.stop_training()
            # Assert that with dfc > 0 ordinary SFA wouldn't reach this line.
            assert dfc == 0
        except mdp.NodeException:
            sfa2_pca.set_rank_deficit_method('pca')
            sfa2_pca.rank_threshold = rk_thr_dict['pca']
            sfa2_pca.stop_training()
        sdat_pca = sfa2_pca.execute(dat)

        sfa2_svd = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='svd')
        sfa2_svd.rank_threshold = rk_thr_dict['svd']
        sfa2_svd.train(dat)
        sfa2_svd.stop_training()
        sdat_svd = sfa2_svd.execute(dat)

        def matrix_cmp(A, B):
            assert_array_almost_equal(abs(A), abs(B))
            return True

        if check_data['reg']:
            assert_array_almost_equal(abs(sdat_reg), abs(sdat0),
                                      eq_pr_dict['reg'])
        if check_data['pca']:
            assert_array_almost_equal(abs(sdat_pca), abs(sdat0),
                                      eq_pr_dict['pca'])
        if check_data['svd']:
            assert_array_almost_equal(abs(sdat_svd), abs(sdat0),
                                      eq_pr_dict['svd'])

        reg_dfc = sfa2_reg.rank_deficit == dfc
        pca_dfc = sfa2_pca.rank_deficit == dfc
        svd_dfc = sfa2_svd.rank_deficit == dfc

        if reg_dfc:
            assert_array_almost_equal(sfa2_reg.d, sfa0.d, eq_pr_dict['reg'])
        if pca_dfc:
            assert_array_almost_equal(sfa2_pca.d, sfa0.d, eq_pr_dict['pca'])
        if svd_dfc:
            assert_array_almost_equal(sfa2_svd.d, sfa0.d, eq_pr_dict['svd'])

        # check that constraints are met
        idn = numx.identity(out)
        d_diag = numx.diag(sfa0.d)
        # reg ok?
        assert_array_almost_equal(
            mult(sdat_reg.T, sdat_reg) / (len(sdat_reg) - 1), idn,
            eq_pr_dict['reg'])
        sdat_reg_d = sdat_reg[1:] - sdat_reg[:-1]
        assert_array_almost_equal(
            mult(sdat_reg_d.T, sdat_reg_d) / (len(sdat_reg_d) - 1), d_diag,
            eq_pr_dict['reg'])
        # pca ok?
        assert_array_almost_equal(
            mult(sdat_pca.T, sdat_pca) / (len(sdat_pca) - 1), idn,
            eq_pr_dict['pca'])
        sdat_pca_d = sdat_pca[1:] - sdat_pca[:-1]
        assert_array_almost_equal(
            mult(sdat_pca_d.T, sdat_pca_d) / (len(sdat_pca_d) - 1), d_diag,
            eq_pr_dict['pca'])
        # svd ok?
        assert_array_almost_equal(
            mult(sdat_svd.T, sdat_svd) / (len(sdat_svd) - 1), idn,
            eq_pr_dict['svd'])
        sdat_svd_d = sdat_svd[1:] - sdat_svd[:-1]
        assert_array_almost_equal(
            mult(sdat_svd_d.T, sdat_svd_d) / (len(sdat_svd_d) - 1), d_diag,
            eq_pr_dict['svd'])

        try:
            # test ldl separately due to its requirement of SciPy >= 1.0
            sfa2_ldl = mdp.nodes.SFANode(output_dim=out,
                                         rank_deficit_method='ldl')
            sfa2_ldl.rank_threshold = rk_thr_dict['ldl']
            have_ldl = True
        except NodeException:
            # No SciPy >= 1.0 available.
            have_ldl = False
        if have_ldl:
            sfa2_ldl.train(dat)
            sfa2_ldl.stop_training()
            sdat_ldl = sfa2_ldl.execute(dat)

            if check_data['ldl']:
                assert_array_almost_equal(abs(sdat_ldl), abs(sdat0),
                                          eq_pr_dict['ldl'])
            ldl_dfc = sfa2_ldl.rank_deficit == dfc
            if ldl_dfc:
                assert_array_almost_equal(sfa2_ldl.d, sfa0.d,
                                          eq_pr_dict['ldl'])

            # check that constraints are met
            # ldl ok?
            assert_array_almost_equal(
                mult(sdat_ldl.T, sdat_ldl) / (len(sdat_ldl) - 1), idn,
                eq_pr_dict['ldl'])
            sdat_ldl_d = sdat_ldl[1:] - sdat_ldl[:-1]
            assert_array_almost_equal(
                mult(sdat_ldl_d.T, sdat_ldl_d) / (len(sdat_ldl_d) - 1), d_diag,
                eq_pr_dict['ldl'])
        else:
            ldl_dfc = None
        ldl_dfc2 = ldl_dfc is True or (ldl_dfc is None and not have_ldl)
        assert all((reg_dfc or not check_dfc['reg'],
                pca_dfc or not check_dfc['pca'],
                svd_dfc or not check_dfc['svd'],
                ldl_dfc2 or not check_dfc['ldl'])), \
                "Rank deficit ok? reg: %s, pca: %s, svd: %s, ldl: %s" % \
                (reg_dfc, pca_dfc, svd_dfc, ldl_dfc)
        return sfa2_pca.d

    # ============test with random data:
    dfc_max = 200
    dat_dim = 500
    dat_smpl = 10000
    dat = numx.random.rand(dat_smpl, dat_dim)  # test data
    dfc = numx.random.randint(0, dfc_max)  # rank deficit
    out = numx.random.randint(4, dat_dim - 50 - dfc)  # output dim

    # We add some linear redundancy to the data...
    if dfc > 0:
        # dfc is how many dimensions we overwrite with duplicates
        # This should yield an overal rank deficit of dfc
        dat0 = dat[:, :-dfc]  # for use by ordinary SFA
        dat[:, -dfc:] = dat[:, :dfc]
    else:
        dat0 = dat

    test_for_data(dat, dat0, dfc, out)

    # We mix the redundancy a bit more with the other data and test again...
    if dfc > 0:
        # This should yield an overal rank deficit of dfc
        ovl = numx.random.randint(0, dat_dim - max(out, dfc_max))
        # We generate a random, yet orthogonal matrix M for mixing:
        M = numx.random.rand(dfc + ovl, dfc + ovl)
        _, M = symeig(M + M.T)
        dat[:, -(dfc + ovl):] = dat[:, -(dfc + ovl):].dot(M)

        # We test again with mixing matrix applied
        test_for_data(dat, dat0, dfc, out)

    # ============test with nasty data:

    # Create another set of data...
    dat = numx.random.rand(dat_smpl, dat_dim)  # test data
    dfc = numx.random.randint(0, dfc_max)  # rank deficit
    out = numx.random.randint(4, dat_dim - 50 - dfc)  # output dim

    # We add some linear redundancy to the data...
    if dfc > 0:
        # dfc is how many dimensions we overwrite with duplicates
        # This should yield an overal rank deficit of dfc
        dat0 = dat[:, :-dfc]  # for use by ordinary SFA
        dat[:, -dfc:] = dat[:, :dfc]
    else:
        dat0 = dat

    # And additionally add a very slow actual feature...
    dat[:, dfc] = numx.arange(dat_smpl)

    # We mute some checks here because they sometimes fail
    check_data = {'reg': False, 'pca': False, 'svd': False, 'ldl': False}
    check_dfc = {'reg': False, 'pca': False, 'svd': False, 'ldl': False}
    # Note: In most cases accuracy is much higher than checked here.
    eq_pr_dict = {'reg': 2, 'pca': 2, 'svd': 2, 'ldl': 2}
    rk_thr_dict = {'reg': 1e-8, 'pca': 1e-7, 'svd': 1e-7, 'ldl': 1e-6}
    # Here we assert the very slow but actual feature is not filtered out:
    assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict,
                         check_data, check_dfc)[0] < 1e-5

    # We mix the redundancy a bit more with the other data and test again...
    if dfc > 0:
        # This should yield an overal rank deficit of dfc
        ovl = numx.random.randint(0, dat_dim - max(out, dfc_max))
        # We generate a random, yet orthogonal matrix M for mixing:
        M = numx.random.rand(dfc + ovl, dfc + ovl)
        _, M = symeig(M + M.T)
        dat[:, -(dfc + ovl):] = dat[:, -(dfc + ovl):].dot(M)

        # We test again with mixing matrix applied
        # Again we assert the very slow but actual feature is not filtered out:
        assert test_for_data(dat, dat0, dfc, out, eq_pr_dict, rk_thr_dict,
                             check_data, check_dfc)[0] < 1e-5
Пример #2
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' %
                   (self.data.shape[0], self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r * Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim + 1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)

        self.training_projection = U
Пример #3
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out * (d_out + 1) / 2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1 + d_out + dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.' %
                   (k, 1 + d_out + dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp * N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M - M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k + 1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs]  # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct + d_out - i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out - i
            Yi = numx.concatenate(
                [numx.ones((1, k), dtype=self.dtype), nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1 + d_out + dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out + 1:d_out + 1 + dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0)  #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S) < 1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row * dp:(row + 1) * dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out + 1))
            Y = U * numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim + 1), overwrite=True)
            Y = U * numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Пример #4
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        if self.verbose:
            msg = ('training LLE on %i points'
                   ' in %i dimensions...' % (self.data.shape[0],
                                             self.data.shape[1]))
            print msg

        # some useful quantities
        M = self.data
        N = M.shape[0]
        k = self.k
        r = self.r

        # indices of diagonal elements
        W_diag_idx = numx.arange(N)
        Q_diag_idx = numx.arange(k)

        if k > N:
            err = ('k=%i must be less than or '
                   'equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # do we need to automatically determine the regularization term?
        auto_reg = r is None

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        # build the weight matrix
        #XXX future work:
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                Q = Qs[row, :, :]
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]
                M_Mi = M_Mi[nbrs]
                # compute covariance matrix of distances
                Q = mult(M_Mi, M_Mi.T)

            # -----------------------------------------------
            #  compute weight vector based on neighbors
            # -----------------------------------------------

            #Covariance matrix may be nearly singular:
            # add a diagonal correction to prevent numerical errors
            if auto_reg:
                # automatic mode: correction is equal to the sum of
                # the (d_in-d_out) unused variances (as in deRidder &
                # Duin)
                if learn_outdim:
                    sig2 = sig2s[row, :]
                else:
                    sig2 = svd(M_Mi, compute_uv=0)**2
                r = numx.sum(sig2[self.output_dim:])
                Q[Q_diag_idx, Q_diag_idx] += r
            else:
                # Roweis et al instead use "a correction that
                #   is small compared to the trace" e.g.:
                # r = 0.001 * float(Q.trace())
                # this is equivalent to assuming 0.1% of the variance is unused
                Q[Q_diag_idx, Q_diag_idx] += r*Q.trace()

            #solve for weight
            # weight is w such that sum(Q_ij * w_j) = 1 for all i
            # XXX refcast is due to numpy bug: floats become double
            w = self._refcast(numx_linalg.solve(Q, numx.ones(k)))
            w /= w.sum()

            #update row of the weight matrix
            W[nbrs, row] = w

        if self.verbose:
            msg = (' - finding [%i x %i] null space of weight matrix\n'
                   '     (may take a while)...' % (self.output_dim, N))
            print msg

        self.W = W.copy()
        #to find the null space, we need the bottom d+1
        #  eigenvectors of (W-I).T*(W-I)
        #Compute this using the svd of (W-I):
        W[W_diag_idx, W_diag_idx] -= 1.

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step
        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, self.output_dim+1))
        else:
            # the following code does the same computation, but uses
            # symeig, which computes only the required eigenvectors, and
            # is much faster. However, it could also be more unstable...
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            WW[W_diag_idx, W_diag_idx] += 0.1
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)

        self.training_projection = U
Пример #5
0
    def _stop_training(self):
        Cumulator._stop_training(self)

        k = self.k
        M = self.data
        N = M.shape[0]

        if k > N:
            err = ('k=%i must be less than'
                   ' or equal to number of training points N=%i' % (k, N))
            raise TrainingException(err)

        if self.verbose:
            print 'performing HLLE on %i points in %i dimensions...' % M.shape

        # determines number of output dimensions: if desired_variance
        # is specified, we need to learn it from the data. Otherwise,
        # it's easy
        learn_outdim = False
        if self.output_dim is None:
            if self.desired_variance is None:
                self.output_dim = self.input_dim
            else:
                learn_outdim = True

        # determine number of output dims, precalculate useful stuff
        if learn_outdim:
            Qs, sig2s, nbrss = self._adjust_output_dim()

        d_out = self.output_dim

        #dp = d_out + (d_out-1) + (d_out-2) + ...
        dp = d_out*(d_out+1)/2

        if min(k, N) <= d_out:
            err = ('k=%i and n=%i (number of input data points) must be'
                   ' larger than output_dim=%i' % (k, N, d_out))
            raise TrainingException(err)

        if k < 1+d_out+dp:
            wrn = ('The number of neighbours, k=%i, is smaller than'
                   ' 1 + output_dim + output_dim*(output_dim+1)/2 = %i,'
                   ' which might result in unstable results.'
                   % (k, 1+d_out+dp))
            _warnings.warn(wrn, MDPWarning)

        #build the weight matrix
        #XXX   for faster implementation, W should be a sparse matrix
        W = numx.zeros((N, dp*N), dtype=self.dtype)

        if self.verbose:
            print ' - constructing [%i x %i] weight matrix...' % W.shape

        for row in range(N):
            if learn_outdim:
                nbrs = nbrss[row, :]
            else:
                # -----------------------------------------------
                #  find k nearest neighbors
                # -----------------------------------------------
                M_Mi = M-M[row]
                nbrs = numx.argsort((M_Mi**2).sum(1))[1:k+1]

            #-----------------------------------------------
            #  center the neighborhood using the mean
            #-----------------------------------------------
            nbrhd = M[nbrs] # this makes a copy
            nbrhd -= nbrhd.mean(0)

            #-----------------------------------------------
            #  compute local coordinates
            #   using a singular value decomposition
            #-----------------------------------------------
            U, sig, VT = svd(nbrhd)
            nbrhd = U.T[:d_out]
            del VT

            #-----------------------------------------------
            #  build Hessian estimator
            #-----------------------------------------------
            Yi = numx.zeros((dp, k), dtype=self.dtype)
            ct = 0
            for i in range(d_out):
                Yi[ct:ct+d_out-i, :] = nbrhd[i] * nbrhd[i:, :]
                ct += d_out-i
            Yi = numx.concatenate([numx.ones((1, k), dtype=self.dtype),
                                   nbrhd, Yi], 0)

            #-----------------------------------------------
            #  orthogonalize linear and quadratic forms
            #   with QR factorization
            #  and make the weights sum to 1
            #-----------------------------------------------
            if k >= 1+d_out+dp:
                Q, R = numx_linalg.qr(Yi.T)
                w = Q[:, d_out+1:d_out+1+dp]
            else:
                q, r = _mgs(Yi.T)
                w = q[:, -dp:]

            S = w.sum(0) #sum along columns
            #if S[i] is too small, set it equal to 1.0
            # this prevents weights from blowing up
            S[numx.where(numx.absolute(S)<1E-4)] = 1.0
            #print w.shape, S.shape, (w/S).shape
            #print W[nbrs, row*dp:(row+1)*dp].shape
            W[nbrs, row*dp:(row+1)*dp] = w / S

        #-----------------------------------------------
        # To find the null space, we want the
        #  first d+1 eigenvectors of W.T*W
        # Compute this using an svd of W
        #-----------------------------------------------

        if self.verbose:
            msg = (' - finding [%i x %i] '
                   'null space of weight matrix...' % (d_out, N))
            print msg

        #XXX future work:
        #XXX  use of upcoming ARPACK interface for bottom few eigenvectors
        #XXX   of a sparse matrix will significantly increase the speed
        #XXX   of the next step

        if self.svd:
            sig, U = nongeneral_svd(W.T, range=(2, d_out+1))
            Y = U*numx.sqrt(N)
        else:
            WW = mult(W, W.T)
            # regularizes the eigenvalues, does not change the eigenvectors:
            W_diag_idx = numx.arange(N)
            WW[W_diag_idx, W_diag_idx] += 0.01
            sig, U = symeig(WW, range=(2, self.output_dim+1), overwrite=True)
            Y = U*numx.sqrt(N)
            del WW
        del W

        #-----------------------------------------------
        # Normalize Y
        #
        # Alternative way to do it:
        #  we need R = (Y.T*Y)^(-1/2)
        #   do this with an SVD of Y            del VT

        #      Y = U*sig*V.T
        #      Y.T*Y = (V*sig.T*U.T) * (U*sig*V.T)
        #            = V * (sig*sig.T) * V.T
        #            = V * sig^2 V.T
        #   so
        #      R = V * sig^-1 * V.T
        # The code is:
        #    U, sig, VT = svd(Y)
        #    del U
        #    S = numx.diag(sig**-1)
        #    self.training_projection = mult(Y, mult(VT.T, mult(S, VT)))
        #-----------------------------------------------
        if self.verbose:
            print ' - normalizing null space...'

        C = sqrtm(mult(Y.T, Y))
        self.training_projection = mult(Y, C)
Пример #6
0
def testSFANode_rank_deficit():

    def test_for_data(dat, dat0, dfc, out, eq_pr_dict=None, rk_thr_dict=None,
                check_data=None, check_dfc=None):
        if eq_pr_dict is None:
            eq_pr_dict = {'reg': 5, 'pca': 5, 'svd': 5, 'ldl': 5}
        if check_data is None:
            check_data = {'reg': True, 'pca': True, 'svd': True, 'ldl': True}
        if check_dfc is None:
            check_dfc = {'reg': True, 'pca': True, 'svd': True, 'ldl': True}
        if rk_thr_dict is None:
            rk_thr_dict = { \
                    'reg': 1e-10, 'pca': 1e-10, 'svd': 1e-10, 'ldl': 1e-10}
        sfa0 = mdp.nodes.SFANode(output_dim=out)
        sfa0.train(dat0)
        sfa0.stop_training()
        sdat0 = sfa0.execute(dat0)
    
        sfa2_reg = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='reg')
        sfa2_reg.rank_threshold = rk_thr_dict['reg']
        # This is equivalent to sfa2._sfa_solver = sfa2._rank_deficit_solver_reg
        sfa2_reg.train(dat)
        sfa2_reg.stop_training()
        sdat_reg = sfa2_reg.execute(dat)
    
        sfa2_pca = mdp.nodes.SFANode(output_dim=out)
        # For this test we add the rank_deficit_solver later, so we can
        # assert that ordinary SFA would actually fail on the data.
        sfa2_pca.train(dat)
        try:
            sfa2_pca.stop_training()
            # Assert that with dfc > 0 ordinary SFA wouldn't reach this line.
            assert dfc == 0
        except mdp.NodeException:
            sfa2_pca.set_rank_deficit_method('pca')
            sfa2_pca.rank_threshold = rk_thr_dict['pca']
            sfa2_pca.stop_training()
        sdat_pca = sfa2_pca.execute(dat)
        
        sfa2_svd = mdp.nodes.SFANode(output_dim=out, rank_deficit_method='svd')
        sfa2_svd.rank_threshold = rk_thr_dict['svd']
        sfa2_svd.train(dat)
        sfa2_svd.stop_training()
        sdat_svd = sfa2_svd.execute(dat)
    
        def matrix_cmp(A, B):
            assert_array_almost_equal(abs(A), abs(B))
            return True

        if check_data['reg']:
            assert_array_almost_equal(abs(sdat_reg), abs(sdat0),
                    eq_pr_dict['reg'])
        if check_data['pca']:
            assert_array_almost_equal(abs(sdat_pca), abs(sdat0),
                    eq_pr_dict['pca'])
        if check_data['svd']:
            assert_array_almost_equal(abs(sdat_svd), abs(sdat0),
                    eq_pr_dict['svd'])

        reg_dfc = sfa2_reg.rank_deficit == dfc
        pca_dfc = sfa2_pca.rank_deficit == dfc
        svd_dfc = sfa2_svd.rank_deficit == dfc

        if reg_dfc:
            assert_array_almost_equal(
                    sfa2_reg.d, sfa0.d, eq_pr_dict['reg'])
        if pca_dfc:
            assert_array_almost_equal(
                    sfa2_pca.d, sfa0.d, eq_pr_dict['pca'])
        if svd_dfc:
            assert_array_almost_equal(
                    sfa2_svd.d, sfa0.d, eq_pr_dict['svd'])

        # check that constraints are met
        idn = numx.identity(out)
        d_diag = numx.diag(sfa0.d)
        # reg ok?
        assert_array_almost_equal(
                mult(sdat_reg.T, sdat_reg)/(len(sdat_reg)-1),
                idn, eq_pr_dict['reg'])
        sdat_reg_d = sdat_reg[1:]-sdat_reg[:-1]
        assert_array_almost_equal(
                mult(sdat_reg_d.T, sdat_reg_d)/(len(sdat_reg_d)-1),
                d_diag, eq_pr_dict['reg'])
        # pca ok?
        assert_array_almost_equal(
                mult(sdat_pca.T, sdat_pca)/(len(sdat_pca)-1),
                idn, eq_pr_dict['pca'])
        sdat_pca_d = sdat_pca[1:]-sdat_pca[:-1]
        assert_array_almost_equal(
                mult(sdat_pca_d.T, sdat_pca_d)/(len(sdat_pca_d)-1),
                d_diag, eq_pr_dict['pca'])
        # svd ok?
        assert_array_almost_equal(
                mult(sdat_svd.T, sdat_svd)/(len(sdat_svd)-1),
                idn, eq_pr_dict['svd'])
        sdat_svd_d = sdat_svd[1:]-sdat_svd[:-1]
        assert_array_almost_equal(
                mult(sdat_svd_d.T, sdat_svd_d)/(len(sdat_svd_d)-1),
                d_diag, eq_pr_dict['svd'])
    
        try:
            # test ldl separately due to its requirement of SciPy >= 1.0
            sfa2_ldl = mdp.nodes.SFANode(output_dim=out,
                    rank_deficit_method='ldl')
            sfa2_ldl.rank_threshold = rk_thr_dict['ldl']
            have_ldl = True
        except NodeException:
            # No SciPy >= 1.0 available.
            have_ldl = False
        if have_ldl:
            sfa2_ldl.train(dat)
            sfa2_ldl.stop_training()
            sdat_ldl = sfa2_ldl.execute(dat)

            if check_data['ldl']:
                assert_array_almost_equal(abs(sdat_ldl), abs(sdat0),
                        eq_pr_dict['ldl'])
            ldl_dfc =  sfa2_ldl.rank_deficit == dfc
            if ldl_dfc:
                assert_array_almost_equal(
                        sfa2_ldl.d, sfa0.d, eq_pr_dict['ldl'])
    
            # check that constraints are met
            # ldl ok?
            assert_array_almost_equal(
                    mult(sdat_ldl.T, sdat_ldl)/(len(sdat_ldl)-1),
                    idn, eq_pr_dict['ldl'])
            sdat_ldl_d = sdat_ldl[1:]-sdat_ldl[:-1]
            assert_array_almost_equal(
                    mult(sdat_ldl_d.T, sdat_ldl_d)/(len(sdat_ldl_d)-1),
                    d_diag, eq_pr_dict['ldl'])
        else:
            ldl_dfc = None
        ldl_dfc2 = ldl_dfc is True or (ldl_dfc is None and not have_ldl)
        assert all((reg_dfc or not check_dfc['reg'],
                pca_dfc or not check_dfc['pca'],
                svd_dfc or not check_dfc['svd'],
                ldl_dfc2 or not check_dfc['ldl'])), \
                "Rank deficit ok? reg: %s, pca: %s, svd: %s, ldl: %s" % \
                (reg_dfc, pca_dfc, svd_dfc, ldl_dfc)
        return sfa2_pca.d

    # ============test with random data:
    dfc_max = 200
    dat_dim = 500
    dat_smpl = 10000
    dat = numx.random.rand(dat_smpl, dat_dim)     # test data
    dfc = numx.random.randint(0, dfc_max)         # rank deficit
    out = numx.random.randint(4, dat_dim-50-dfc)  # output dim

    # We add some linear redundancy to the data...
    if dfc > 0:
        # dfc is how many dimensions we overwrite with duplicates
        # This should yield an overal rank deficit of dfc
        dat0 = dat[:, :-dfc] # for use by ordinary SFA
        dat[:, -dfc:] = dat[:, :dfc]
    else:
        dat0 = dat

    test_for_data(dat, dat0, dfc, out)
    
    # We mix the redundancy a bit more with the other data and test again...
    if dfc > 0:
        # This should yield an overal rank deficit of dfc
        ovl = numx.random.randint(0, dat_dim-max(out, dfc_max))
        # We generate a random, yet orthogonal matrix M for mixing:
        M = numx.random.rand(dfc+ovl, dfc+ovl)
        _, M = symeig(M+M.T)
        dat[:, -(dfc+ovl):] = dat[:, -(dfc+ovl):].dot(M)
        
        # We test again with mixing matrix applied
        test_for_data(dat, dat0, dfc, out)


    # ============test with nasty data:
    
    # Create another set of data...
    dat = numx.random.rand(dat_smpl, dat_dim)     # test data
    dfc = numx.random.randint(0, dfc_max)         # rank deficit
    out = numx.random.randint(4, dat_dim-50-dfc)  # output dim

    # We add some linear redundancy to the data...
    if dfc > 0:
        # dfc is how many dimensions we overwrite with duplicates
        # This should yield an overal rank deficit of dfc
        dat0 = dat[:, :-dfc] # for use by ordinary SFA
        dat[:, -dfc:] = dat[:, :dfc]
    else:
        dat0 = dat
    
    # And additionally add a very slow actual feature...
    dat[:, dfc] = numx.arange(dat_smpl)

    # We mute some checks here because they sometimes fail
    check_data = {'reg': False, 'pca': False, 'svd': False, 'ldl': False}
    check_dfc = {'reg': False, 'pca': False, 'svd': False, 'ldl': False}
    # Note: In most cases accuracy is much higher than checked here.
    eq_pr_dict = {'reg': 2, 'pca': 2, 'svd': 2, 'ldl': 2}
    rk_thr_dict = {'reg': 1e-8, 'pca': 1e-7, 'svd': 1e-7, 'ldl': 1e-6}
    # Here we assert the very slow but actual feature is not filtered out:
    assert test_for_data(dat, dat0, dfc, out,
            eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5

    # We mix the redundancy a bit more with the other data and test again...
    if dfc > 0:
        # This should yield an overal rank deficit of dfc
        ovl = numx.random.randint(0, dat_dim-max(out, dfc_max))
        # We generate a random, yet orthogonal matrix M for mixing:
        M = numx.random.rand(dfc+ovl, dfc+ovl)
        _, M = symeig(M+M.T)
        dat[:, -(dfc+ovl):] = dat[:, -(dfc+ovl):].dot(M)

        # We test again with mixing matrix applied
        # Again we assert the very slow but actual feature is not filtered out:
        assert test_for_data(dat, dat0, dfc, out,
            eq_pr_dict, rk_thr_dict, check_data, check_dfc)[0] < 1e-5