示例#1
0
    def test_svd_daal_vs_sklearn(rows=1000, columns=1000):
        indata = get_random_array(rows, columns)
        daal_input = HomogenNumericTable(indata)
        algorithm = svd.Batch()
        algorithm.input.set(svd.data, daal_input)

        start_sklearn = time.time()
        _U, s, _Vh = np.linalg.svd(indata, full_matrices=False)
        end_sklearn = time.time()

        start_daal = time.time()
        result = algorithm.compute()
        end_daal = time.time()

        if os.getenv("CHECKPERFORMANCE") is not None:
            assert (end_daal - start_daal <= end_sklearn - start_sklearn)

        sigma = getNumpyArray(result.get(svd.singularValues))
        _rows, cols = sigma.shape
        d_sigma = sigma.reshape(cols, )

        assert_array_almost_equal(d_sigma, s)

        print("SVD for matrix[{}][{}]".format(rows, columns))
        print("+ Sklearn SVD: {}".format(end_sklearn - start_sklearn))
        print("+ Sklearn Daal: {}".format(end_daal - start_daal))
示例#2
0
    def test_svd_simple():
        indata = np.array([[1, 2], [3, 4], [5, 6], [7, 8]])
        dataSource = HomogenNumericTable(indata)
        _in_rows, in_columns = indata.shape

        algorithm = svd.Batch(method=svd.defaultDense,
                              leftSingularMatrix=svd.requiredInPackedForm,
                              rightSingularMatrix=svd.requiredInPackedForm)

        algorithm.input.set(svd.data, dataSource)
        result = algorithm.compute()

        sigma = getNumpyArray(result.get(svd.singularValues))
        U = getNumpyArray(result.get(svd.leftSingularMatrix))
        V = getNumpyArray(result.get(svd.rightSingularMatrix))

        assert sigma.shape[1] == in_columns
        assert indata.shape == U.shape
        assert in_columns == V.shape[0] == V.shape[1]

        assert_array_almost_equal(np.array([[14.269, 0.6268]]),
                                  sigma,
                                  decimal=4)

        assert_array_almost_equal(np.array([[-0.152, -0.823], [-0.350, -0.421],
                                            [-0.547, -0.020], [-0.745,
                                                               0.381]]),
                                  U,
                                  decimal=3)

        assert_array_almost_equal(np.array([[-0.641, -0.767], [0.767,
                                                               -0.641]]),
                                  V,
                                  decimal=3)
示例#3
0
文件: svd.py 项目: yxoos/h2o4gpu
    def fit_transform(self, X, y=None):
        '''
        Fit SVD to X
        :param X: array-like shape n_samples x n_features(n_components)
        TODO@monika: sparse matrix
        :param y: None
        :return: self object, returns the transformer object
        '''
        _ = y
        hdd = IInput.HomogenousDaalData(X)
        input_type = hdd.informat

        def column_lambda(input_, components):
            if components <= input_.shape[1]:
                return input_[:, 0:components]
            return input_

        if input_type == 'numpy':
            X = column_lambda(X, self.n_components)
        elif input_type == 'pandas':
            X = column_lambda(X.as_matrix(), self.n_components)
        else:
            pass  # CSV column size is not supported

        Input = hdd.getNumericTable()

        algorithm = svd.Batch(
            method=svd.defaultDense,
            leftSingularMatrix=self.parameters['leftSingularMatrix'],
            rightSingularMatrix=self.parameters['rightSingularMatrix'])
        algorithm.input.set(svd.data, Input)

        # compute SVD decomposition
        result = algorithm.compute()
        U, Sigma, VT = result.get(svd.leftSingularMatrix), \
            result.get(svd.singularValues), \
            result.get(svd.rightSingularMatrix)

        # transform result to numpy array
        self._U = IInput.getNumpyArray(nT=U)
        self._Q = IInput.getNumpyArray(nT=VT)

        sigma = IInput.getNumpyArray(nT=Sigma)
        _, cols = sigma.shape
        self._w = sigma.reshape(cols,)

        # Calculate explained variance & explained variance ratio
        X_transformed = self._U * self._w
        self.explained_variance = exp_var = np.var(X_transformed, axis=0)
        # todo @Monika: support csr, crs
        full_var = np.var(X, axis=0).sum()
        self.explained_variance_ratio_ = exp_var / full_var
        return X_transformed
示例#4
0
    def test_svd_simple_check():
        indata = np.array([[1, 3, 4], [5, 6, 9], [1, 2, 3], [7, 6, 8]])
        dataSource = HomogenNumericTable(indata)

        algorithm = svd.Batch()
        algorithm.input.set(svd.data, dataSource)
        result = algorithm.compute()

        sigma = getNumpyArray(result.get(svd.singularValues))
        U = getNumpyArray(result.get(svd.leftSingularMatrix))
        V = getNumpyArray(result.get(svd.rightSingularMatrix))

        # create diagonal matrix of Singular values
        _rows, cols = sigma.shape
        d_sigma = sigma.reshape(cols, )
        outdata = np.dot(U, np.dot(np.diag(d_sigma), V))

        assert_array_almost_equal(outdata, indata)