Python eye示例，skcuda.linalg.eye Python示例

示例#1

0

显示文件

文件： slfn_skcuda.py 项目： zhucer2003/hpelm

    def add_batch(self, X, T, wc=None):
        """Add a batch of training data to an iterative solution, weighted if neeed.

        The batch is processed as a whole, the training data is splitted in `ELM.add_data()` method.
        With parameters HH_out, HT_out, the output will be put into these matrices instead of model.

        Args:
            X (matrix): input data matrix size (N * `inputs`)
            T (matrix): output data matrix size (N * `outputs`)
            wc (vector): vector of weights for data samples, one weight per sample, size (N * 1)
            HH_out, HT_out (matrix, optional): output matrices to add batch result into, always given together
        """
        devH = self._project(X, dev=True)
        T = np.array(T, order="C", dtype=self.precision)
        devT = gpuarray.to_gpu(T)
        if wc is not None:  # apply weights if given
            w = np.array(
                wc**0.5,
                dtype=self.precision)[:, None]  # re-shape to column matrix
            devWC = gpuarray.to_gpu(w)
            misc.mult_matvec(devH, devWC, axis=0, out=devH)
            misc.mult_matvec(devT, devWC, axis=0, out=devT)

        if self.HH is None:  # initialize space for self.HH, self.HT
            self.HT = misc.zeros((self.L, self.outputs), dtype=self.precision)
            self.HH = linalg.eye(self.L, dtype=self.precision)
            self.HH *= self.norm

        linalg.add_dot(devH, devT, self.HT, transa='T')
        if self.precision is np.float64:
            linalg.add_dot(devH, devH, self.HH, transa='T')
        else:
            cublas.cublasSsyrk(self.handle, 'L', 'N', self.L, X.shape[0], 1,
                               devH.ptr, self.L, 1, self.HH.ptr, self.L)

示例#2

0

显示文件

文件： slfn_skcuda.py 项目： IstanbulBoy/hpelm

    def add_batch(self, X, T, wc=None):
        """Add a batch of training data to an iterative solution, weighted if neeed.

        The batch is processed as a whole, the training data is splitted in `ELM.add_data()` method.
        With parameters HH_out, HT_out, the output will be put into these matrices instead of model.

        Args:
            X (matrix): input data matrix size (N * `inputs`)
            T (matrix): output data matrix size (N * `outputs`)
            wc (vector): vector of weights for data samples, one weight per sample, size (N * 1)
            HH_out, HT_out (matrix, optional): output matrices to add batch result into, always given together
        """
        devH = self._project(X, dev=True)
        T = np.array(T, order="C", dtype=self.precision)
        devT = gpuarray.to_gpu(T)
        if wc is not None:  # apply weights if given
            w = np.array(wc**0.5, dtype=self.precision)[:, None]  # re-shape to column matrix
            devWC = gpuarray.to_gpu(w)
            misc.mult_matvec(devH, devWC, axis=0, out=devH)
            misc.mult_matvec(devT, devWC, axis=0, out=devT)

        if self.HH is None:  # initialize space for self.HH, self.HT
            self.HT = misc.zeros((self.L, self.outputs), dtype=self.precision)
            self.HH = linalg.eye(self.L, dtype=self.precision)
            self.HH *= self.norm

        linalg.add_dot(devH, devT, self.HT, transa='T')
        if self.precision is np.float64:
            linalg.add_dot(devH, devH, self.HH, transa='T')
        else:
            cublas.cublasSsyrk(self.handle, 'L', 'N', self.L, X.shape[0], 1, devH.ptr, self.L, 1, self.HH.ptr, self.L)

示例#3

0

显示文件

 def compute_analysis_cuda2(self,
                            xb,
                            y,
                            R,
                            P,
                            H,
                            HT=None,
                            hph=None,
                            calcP=True):
     if HT is None:
         HT = culinalg.transpose(H)
     HP = culinalg.dot(H, P)
     if hph is None:
         hph = culinalg.dot(HP, HT)
     Rhph = misc.add(R, hph)
     inv = culinalg.inv(Rhph)
     W = culinalg.dot(HP, inv, transa='T')
     Hxb = culinalg.dot(H, xb)
     yHxb = misc.subtract(y, Hxb)
     WyHxb = culinalg.dot(W, yHxb)
     xhat = misc.add(xb, WyHxb)
     #xhat = xb + culinalg.dot(W, (y - culinalg.dot(H, xb)))
     if calcP:
         I = culinalg.eye(P.shape[0])
         WH = culinalg.dot(W, H)
         IWH = I - WH
         Phat = culinalg.dot(IWH, P)
     else:
         Phat = misc.zeros((1, ), dtype=P.dtype)
     return xhat, Phat

示例#4

0

显示文件

文件： test_linalg.py 项目： akassab/gpu_project

 def test_eye_large_float32(self):
     N = 128
     e_gpu = linalg.eye(N, dtype=np.float32)
     assert np.all(np.eye(N, dtype=np.float32) == e_gpu.get())

示例#5

0

显示文件

文件： test_linalg.py 项目： akassab/gpu_project

 def test_eye_complex128(self):
     N = 10
     e_gpu = linalg.eye(N, dtype=np.complex128)
     assert np.all(np.eye(N, dtype=np.complex128) == e_gpu.get())

示例#6

0

显示文件

文件： test_linalg.py 项目： akassab/gpu_project

 def test_eye_float64(self):
     N = 10
     e_gpu = linalg.eye(N, dtype=np.float64)
     assert np.all(np.eye(N, dtype=np.float64) == e_gpu.get())

示例#7

0

显示文件

 def test_eye_large_float32(self):
     N = 128
     e_gpu = linalg.eye(N, dtype=np.float32)
     assert np.all(np.eye(N, dtype=np.float32) == e_gpu.get())

示例#8

0

显示文件

 def test_eye_complex128(self):
     N = 10
     e_gpu = linalg.eye(N, dtype=np.complex128)
     assert np.all(np.eye(N, dtype=np.complex128) == e_gpu.get())

示例#9

0

显示文件

 def test_eye_float64(self):
     N = 10
     e_gpu = linalg.eye(N, dtype=np.float64)
     assert np.all(np.eye(N, dtype=np.float64) == e_gpu.get())

示例#10

0

显示文件

    def almLasso_mat_fun(self):
        '''
        This function represents the Augumented Lagrangian Multipliers method for Lasso problem.
        The lagrangian form of the Lasso can be expressed as following:

        MIN{ 1/2||Y-XBHETA||_2^2 + lambda||THETA||_1} s.t B-T=0

        When applied to this problem, the ADMM updates take the form

        BHETA^t+1 = (XtX + rhoI)^-1(Xty + rho^t - mu^t)
        THETA^t+1 = Shrinkage_lambda/rho(BHETA(t+1) + mu(t)/rho)
        mu(t+1) = mu(t) + rho(BHETA(t+1) - BHETA(t+1))

        The algorithm involves a 'ridge regression' update for BHETA, a soft-thresholding (shrinkage) step for THETA and
        then a simple linear update for mu

        NB: Actually, this ADMM version contains several variations such as the using of two penalty parameters instead
        of just one of them (mu1, mu2)
        '''

        print('\tADMM processing...')

        alpha1 = alpha2 = 0
        if (len(self.reg_params) == 1):
            alpha1 = self.reg_params[0]
            alpha2 = self.reg_params[0]
        elif (len(self.reg_params) == 2):
            alpha1 = self.reg_params[0]
            alpha2 = self.reg_params[1]

        #thresholds parameters for stopping criteria
        if (len(self.thr) == 1):
            thr1 = self.thr[0]
            thr2 = self.thr[0]
        elif (len(self.thr) == 2):
            thr1 = self.thr[0]
            thr2 = self.thr[1]

        # entry condition
        err1 = 10 * thr1
        err2 = 10 * thr2

        start_time = time.time()

        # setting penalty parameters for the ALM
        mu1p = alpha1 * 1 / self.computeLambda()
        print("\t\t-Compute Lambda- Time = %s seconds" %
              (time.time() - start_time))
        mu2p = alpha2 * 1

        mu1 = mu1p
        mu2 = mu2p

        i = 1
        start_time = time.time()
        if self.GPU == True:

            # defining penalty parameters e constraint to minimize, lambda and C matrix respectively
            THETA = misc.zeros((self.num_columns, self.num_columns),
                               dtype='float64')
            lambda2 = misc.zeros((self.num_columns, self.num_columns),
                                 dtype='float64')

            gpu_data = gpuarray.to_gpu(self.data)
            P_GPU = linalg.dot(gpu_data, gpu_data, transa='T')

            OP1 = P_GPU
            linalg.scale(np.float32(mu1), OP1)

            OP2 = linalg.eye(self.num_columns)
            linalg.scale(mu2, OP2)

            if self.affine == True:

                print('\t\tGPU affine...')

                OP3 = misc.ones((self.num_columns, self.num_columns),
                                dtype='float64')
                linalg.scale(mu2, OP3)
                lambda3 = misc.zeros((1, self.num_columns), dtype='float64')

                # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy
                A = np.linalg.inv(
                    misc.add(misc.add(OP1.get(), OP2.get()), OP3.get()))

                A_GPU = gpuarray.to_gpu(A)

                while ((err1 > thr1 or err2 > thr1) and i < self.max_iter):

                    _lambda2 = gpuarray.to_gpu(lambda2)
                    _lambda3 = gpuarray.to_gpu(lambda3)

                    linalg.scale(1 / mu2, _lambda2)
                    term_OP2 = gpuarray.to_gpu(_lambda2.get())

                    OP2 = gpuarray.to_gpu(misc.subtract(THETA, term_OP2))
                    linalg.scale(mu2, OP2)

                    OP4 = gpuarray.to_gpu(
                        np.matlib.repmat(_lambda3.get(), self.num_columns, 1))

                    # updating Z
                    BHETA = linalg.dot(
                        A_GPU, misc.add(misc.add(misc.add(OP1, OP2), OP3),
                                        OP4))

                    # deallocating unnecessary GPU variables
                    OP2.gpudata.free()
                    OP4.gpudata.free()
                    _lambda2.gpudata.free()
                    _lambda3.gpudata.free()

                    # updating C
                    THETA = misc.add(BHETA, term_OP2)
                    THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2)
                    THETA = THETA.astype('float64')

                    # updating Lagrange multipliers
                    term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA))

                    linalg.scale(mu2, term_lambda2)
                    term_lambda2 = gpuarray.to_gpu(term_lambda2.get())
                    lambda2 = misc.add(lambda2, term_lambda2)  # on GPU

                    term_lambda3 = misc.subtract(
                        misc.ones((1, self.num_columns), dtype='float64'),
                        misc.sum(BHETA, axis=0))
                    linalg.scale(mu2, term_lambda3)
                    term_lambda3 = gpuarray.to_gpu(term_lambda3.get())
                    lambda3 = misc.add(lambda3, term_lambda3)  # on GPU

                    # deallocating unnecessary GPU variables
                    term_OP2.gpudata.free()
                    term_lambda2.gpudata.free()
                    term_lambda3.gpudata.free()

                    err1 = self.errorCoef(BHETA.get(), THETA)
                    err2 = self.errorCoef(np.sum(BHETA.get(), axis=0),
                                          np.ones([1, self.num_columns]))

                    # deallocating unnecessary GPU variables
                    BHETA.gpudata.free()

                    THETA = gpuarray.to_gpu((THETA))

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print(
                            '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e'
                            % (i, err1, err2))
                    i += 1

                THETA = THETA.get()

                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n'
                        % (i, err1, err2))

            else:
                print '\t\tGPU not affine'

                # TODO: Because of some problem with linalg.inv version of scikit-cuda we fix it using np.linalg.inv of numpy
                A = np.linalg.inv(misc.add(OP1.get(), OP2.get()))
                A_GPU = gpuarray.to_gpu(A)

                while (err1 > thr1 and i < self.max_iter):

                    _lambda2 = gpuarray.to_gpu(lambda2)

                    term_OP2 = THETA
                    linalg.scale(mu2, term_OP2)

                    term_OP2 = misc.subtract(term_OP2, _lambda2)

                    OP2 = gpuarray.to_gpu(term_OP2.get())

                    BHETA = linalg.dot(A_GPU, misc.add(OP1, OP2))

                    linalg.scale(1 / mu2, _lambda2)
                    term_THETA = gpuarray.to_gpu(_lambda2.get())

                    THETA = misc.add(BHETA, term_THETA)
                    THETA = self.shrinkL1Lq(THETA.get(), 1 / mu2)

                    THETA = THETA.astype('float32')

                    # updating Lagrange multipliers
                    term_lambda2 = misc.subtract(BHETA, gpuarray.to_gpu(THETA))
                    linalg.scale(mu2, term_lambda2)
                    term_lambda2 = gpuarray.to_gpu(term_lambda2.get())
                    lambda2 = misc.add(lambda2, term_lambda2)  # on GPU

                    err1 = self.errorCoef(BHETA.get(), THETA)

                    THETA = gpuarray.to_gpu((THETA))

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' %
                              (i, err1))
                    i += 1

                THETA = THETA.get()
                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e'
                        % (i, err1))

        else:  #CPU version

            # defining penalty parameters e constraint to minimize, lambda and C matrix respectively
            THETA = np.zeros([self.num_columns, self.num_columns])
            lambda2 = np.zeros([self.num_columns, self.num_columns])

            P = self.data.T.dot(self.data)
            OP1 = np.multiply(P, mu1)

            if self.affine == True:

                # INITIALIZATION
                lambda3 = np.zeros(self.num_columns).T

                A = np.linalg.inv(
                    np.multiply(mu1, P) +
                    np.multiply(mu2, np.eye(self.num_columns, dtype=int)) +
                    np.multiply(mu2,
                                np.ones([self.num_columns, self.num_columns])))

                OP3 = np.multiply(
                    mu2, np.ones([self.num_columns, self.num_columns]))

                while ((err1 > thr1 or err2 > thr1) and i < self.max_iter):

                    # updating Bheta
                    OP2 = np.multiply(THETA - np.divide(lambda2, mu2), mu2)
                    OP4 = np.matlib.repmat(lambda3, self.num_columns, 1)
                    BHETA = A.dot(OP1 + OP2 + OP3 + OP4)

                    # updating C
                    THETA = BHETA + np.divide(lambda2, mu2)
                    THETA = self.shrinkL1Lq(THETA, 1 / mu2)

                    # updating Lagrange multipliers
                    lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA)
                    lambda3 = lambda3 + np.multiply(
                        mu2,
                        np.ones([1, self.num_columns]) - np.sum(BHETA, axis=0))

                    err1 = self.errorCoef(BHETA, THETA)
                    err2 = self.errorCoef(np.sum(BHETA, axis=0),
                                          np.ones([1, self.num_columns]))

                    # mu1 = min(mu1 * (1 + 10 ^ -5), 10 ^ 2 * mu1p);
                    # mu2 = min(mu2 * (1 + 10 ^ -5), 10 ^ 2 * mu2p);

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print(
                            '\t\tIteration = %d, ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e'
                            % (i, err1, err2))
                    i += 1

                Err = [err1, err2]

                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e, ||1 - C^T 1|| = %2.5e. \n'
                        % (i, err1, err2))
            else:
                print '\t\tCPU not affine'

                A = np.linalg.inv(
                    OP1 +
                    np.multiply(mu2, np.eye(self.num_columns, dtype=int)))

                while (err1 > thr1 and i < self.max_iter):

                    # updating Z
                    OP2 = np.multiply(mu2, THETA) - lambda2
                    BHETA = A.dot(OP1 + OP2)

                    # updating C
                    THETA = BHETA + np.divide(lambda2, mu2)
                    THETA = self.shrinkL1Lq(THETA, 1 / mu2)

                    # updating Lagrange multipliers
                    lambda2 = lambda2 + np.multiply(mu2, BHETA - THETA)

                    # computing errors
                    err1 = self.errorCoef(BHETA, THETA)

                    # reporting errors
                    if (self.verbose and (i % self.step == 0)):
                        print('\t\tIteration %5.0f, ||Z - C|| = %2.5e' %
                              (i, err1))
                    i += 1

                Err = [err1, err2]
                if (self.verbose):
                    print(
                        '\t\tTerminating ADMM at iteration %5.0f, \n ||Z - C|| = %2.5e'
                        % (i, err1))

        print("\t\t-ADMM- Time = %s seconds" % (time.time() - start_time))

        return THETA, Err