Пример #1
0
def test_ridge_sample_weights():
    rng = np.random.RandomState(0)

    for solver in ("cholesky", ):
        for n_samples, n_features in ((6, 5), (5, 10)):
            for alpha in (1.0, 1e-2):
                y = rng.randn(n_samples)
                X = rng.randn(n_samples, n_features)
                sample_weight = 1 + rng.rand(n_samples)

                coefs = ridge_regression(X,
                                         y,
                                         alpha=alpha,
                                         sample_weight=sample_weight,
                                         solver=solver)
                # Sample weight can be implemented via a simple rescaling
                # for the square loss.
                coefs2 = ridge_regression(
                    X * np.sqrt(sample_weight)[:, np.newaxis],
                    y * np.sqrt(sample_weight),
                    alpha=alpha,
                    solver=solver)
                assert_array_almost_equal(coefs, coefs2)

                # Test for fit_intercept = True
                est = Ridge(alpha=alpha, solver=solver)
                est.fit(X, y, sample_weight=sample_weight)

                # Check using Newton's Method
                # Quadratic function should be solved in a single step.
                # Initialize
                sample_weight = np.sqrt(sample_weight)
                X_weighted = sample_weight[:, np.newaxis] * (np.column_stack(
                    (np.ones(n_samples), X)))
                y_weighted = y * sample_weight

                # Gradient is (X*coef-y)*X + alpha*coef_[1:]
                # Remove coef since it is initialized to zero.
                grad = -np.dot(y_weighted, X_weighted)

                # Hessian is (X.T*X) + alpha*I except that the first
                # diagonal element should be zero, since there is no
                # penalization of intercept.
                diag = alpha * np.ones(n_features + 1)
                diag[0] = 0.
                hess = np.dot(X_weighted.T, X_weighted)
                hess.flat[::n_features + 2] += diag
                coef_ = -np.dot(linalg.inv(hess), grad)
                assert_almost_equal(coef_[0], est.intercept_)
                assert_array_almost_equal(coef_[1:], est.coef_)
Пример #2
0
def test_ridge_sample_weights():
    rng = np.random.RandomState(0)

    for solver in ("cholesky", ):
        for n_samples, n_features in ((6, 5), (5, 10)):
            for alpha in (1.0, 1e-2):
                y = rng.randn(n_samples)
                X = rng.randn(n_samples, n_features)
                sample_weight = 1 + rng.rand(n_samples)

                coefs = ridge_regression(X, y,
                                         alpha=alpha,
                                         sample_weight=sample_weight,
                                         solver=solver)
                # Sample weight can be implemented via a simple rescaling
                # for the square loss.
                coefs2 = ridge_regression(
                    X * np.sqrt(sample_weight)[:, np.newaxis],
                    y * np.sqrt(sample_weight),
                    alpha=alpha, solver=solver)
                assert_array_almost_equal(coefs, coefs2)

                # Test for fit_intercept = True
                est = Ridge(alpha=alpha, solver=solver)
                est.fit(X, y, sample_weight=sample_weight)

                # Check using Newton's Method
                # Quadratic function should be solved in a single step.
                # Initialize
                sample_weight = np.sqrt(sample_weight)
                X_weighted = sample_weight[:, np.newaxis] * (
                    np.column_stack((np.ones(n_samples), X)))
                y_weighted = y * sample_weight

                # Gradient is (X*coef-y)*X + alpha*coef_[1:]
                # Remove coef since it is initialized to zero.
                grad = -np.dot(y_weighted, X_weighted)

                # Hessian is (X.T*X) + alpha*I except that the first
                # diagonal element should be zero, since there is no
                # penalization of intercept.
                diag = alpha * np.ones(n_features + 1)
                diag[0] = 0.
                hess = np.dot(X_weighted.T, X_weighted)
                hess.flat[::n_features + 2] += diag
                coef_ = - np.dot(linalg.inv(hess), grad)
                assert_almost_equal(coef_[0], est.intercept_)
                assert_array_almost_equal(coef_[1:], est.coef_)
Пример #3
0
def test_ridge_regression_dtype_stability(solver, seed):
    random_state = np.random.RandomState(seed)
    n_samples, n_features = 6, 5
    X = random_state.randn(n_samples, n_features)
    coef = random_state.randn(n_features)
    y = np.dot(X, coef) + 0.01 * random_state.randn(n_samples)
    alpha = 1.0
    results = dict()
    # XXX: Sparse CG seems to be far less numerically stable than the
    # others, maybe we should not enable float32 for this one.
    atol = 1e-3 if solver == "sparse_cg" else 1e-5
    for current_dtype in (np.float32, np.float64):
        results[current_dtype] = ridge_regression(X.astype(current_dtype),
                                                  y.astype(current_dtype),
                                                  alpha=alpha,
                                                  solver=solver,
                                                  random_state=random_state,
                                                  sample_weight=None,
                                                  max_iter=500,
                                                  tol=1e-10,
                                                  return_n_iter=False,
                                                  return_intercept=False)

    assert results[np.float32].dtype == np.float32
    assert results[np.float64].dtype == np.float64
    assert_allclose(results[np.float32], results[np.float64], atol=atol)
Пример #4
0
def test_ridge_regression_dtype_stability(solver):
    random_state = np.random.RandomState(0)
    n_samples, n_features = 6, 5
    X = random_state.randn(n_samples, n_features)
    coef = random_state.randn(n_features)
    y = np.dot(X, coef) + 0.01 * rng.randn(n_samples)
    alpha = 1.0
    rtol = 1e-2 if os.name == 'nt' and _IS_32BIT else 1e-5

    results = dict()
    for current_dtype in (np.float32, np.float64):
        results[current_dtype] = ridge_regression(X.astype(current_dtype),
                                                  y.astype(current_dtype),
                                                  alpha=alpha,
                                                  solver=solver,
                                                  random_state=random_state,
                                                  sample_weight=None,
                                                  max_iter=500,
                                                  tol=1e-10,
                                                  return_n_iter=False,
                                                  return_intercept=False)

    assert results[np.float32].dtype == np.float32
    assert results[np.float64].dtype == np.float64
    assert_allclose(results[np.float32], results[np.float64], rtol=rtol)
Пример #5
0
def test_ridge_regression_dtype_stability(solver, seed):
    random_state = np.random.RandomState(seed)
    n_samples, n_features = 6, 5
    X = random_state.randn(n_samples, n_features)
    coef = random_state.randn(n_features)
    y = np.dot(X, coef) + 0.01 * random_state.randn(n_samples)
    alpha = 1.0
    results = dict()
    # XXX: Sparse CG seems to be far less numerically stable than the
    # others, maybe we should not enable float32 for this one.
    atol = 1e-3 if solver == "sparse_cg" else 1e-5
    for current_dtype in (np.float32, np.float64):
        results[current_dtype] = ridge_regression(X.astype(current_dtype),
                                                  y.astype(current_dtype),
                                                  alpha=alpha,
                                                  solver=solver,
                                                  random_state=random_state,
                                                  sample_weight=None,
                                                  max_iter=500,
                                                  tol=1e-10,
                                                  return_n_iter=False,
                                                  return_intercept=False)

    assert results[np.float32].dtype == np.float32
    assert results[np.float64].dtype == np.float64
    assert_allclose(results[np.float32], results[np.float64], atol=atol)
Пример #6
0
def test_ridge_regression_dtype_stability(solver):
    random_state = np.random.RandomState(0)
    n_samples, n_features = 6, 5
    X = random_state.randn(n_samples, n_features)
    coef = random_state.randn(n_features)
    y = np.dot(X, coef) + 0.01 * rng.randn(n_samples)
    alpha = 1.0
    rtol = 1e-2 if os.name == 'nt' and _IS_32BIT else 1e-5

    results = dict()
    for current_dtype in (np.float32, np.float64):
        results[current_dtype] = ridge_regression(X.astype(current_dtype),
                                                  y.astype(current_dtype),
                                                  alpha=alpha,
                                                  solver=solver,
                                                  random_state=random_state,
                                                  sample_weight=None,
                                                  max_iter=500,
                                                  tol=1e-10,
                                                  return_n_iter=False,
                                                  return_intercept=False)

    assert results[np.float32].dtype == np.float32
    assert results[np.float64].dtype == np.float64
    assert_allclose(results[np.float32], results[np.float64], rtol=rtol)
Пример #7
0
def test_ridge_sample_weights():
    rng = np.random.RandomState(0)
    alpha = 1.0

    for solver in ("sparse_cg", "dense_cholesky", "lsqr"):
        for n_samples, n_features in ((6, 5), (5, 10)):
            y = rng.randn(n_samples)
            X = rng.randn(n_samples, n_features)
            sample_weight = 1 + rng.rand(n_samples)

            coefs = ridge_regression(X, y, alpha, sample_weight, solver=solver)
            # Sample weight can be implemented via a simple rescaling
            # for the square loss
            coefs2 = ridge_regression(
                X * np.sqrt(sample_weight)[:, np.newaxis], y * np.sqrt(sample_weight), alpha, solver=solver
            )
            assert_array_almost_equal(coefs, coefs2)
Пример #8
0
def test_ridge_sample_weights():
    rng = np.random.RandomState(0)
    alpha = 1.0

    for solver in ("sparse_cg", "dense_cholesky", "lsqr"):
        for n_samples, n_features in ((6, 5), (5, 10)):
            y = rng.randn(n_samples)
            X = rng.randn(n_samples, n_features)
            sample_weight = 1 + rng.rand(n_samples)

            coefs = ridge_regression(X, y, alpha, sample_weight, solver=solver)
            # Sample weight can be implemented via a simple rescaling
            # for the square loss
            coefs2 = ridge_regression(X *
                                      np.sqrt(sample_weight)[:, np.newaxis],
                                      y * np.sqrt(sample_weight),
                                      alpha,
                                      solver=solver)
            assert_array_almost_equal(coefs, coefs2)
Пример #9
0
def test_deprecation_warning_dense_cholesky():
    """Tests if DeprecationWarning is raised at instantiation of estimators
    and when ridge_regression is called"""

    warning_class = DeprecationWarning
    warning_message = ("The name 'dense_cholesky' is deprecated."
                       " Using 'cholesky' instead")
    func1 = lambda: Ridge(solver='dense_cholesky')
    func2 = lambda: RidgeClassifier(solver='dense_cholesky')
    X = np.ones([3, 2])
    y = np.zeros(3)
    func3 = lambda: ridge_regression(X, y, alpha=1, solver='dense_cholesky')

    for func in [func1, func2, func3]:
        assert_warns_message(warning_class, warning_message, func)
Пример #10
0
def test_deprecation_warning_dense_cholesky():
    """Tests if DeprecationWarning is raised at instantiation of estimators
    and when ridge_regression is called"""

    warning_class = DeprecationWarning
    warning_message = ("The name 'dense_cholesky' is deprecated."
                       " Using 'cholesky' instead")
    func1 = lambda: Ridge(solver='dense_cholesky')
    func2 = lambda: RidgeClassifier(solver='dense_cholesky')
    X = np.ones([3, 2])
    y = np.zeros(3)
    func3 = lambda: ridge_regression(X, y, alpha=1, solver='dense_cholesky')

    for func in [func1, func2, func3]:
        assert_warns_message(warning_class, warning_message, func)
Пример #11
0
def test_ridge_regression_check_arguments_validity(return_intercept,
                                                   sample_weight, arr_type,
                                                   solver):
    """check if all combinations of arguments give valid estimations"""

    # test excludes 'svd' solver because it raises exception for sparse inputs

    rng = check_random_state(42)
    X = rng.rand(1000, 3)
    true_coefs = [1, 2, 0.1]
    y = np.dot(X, true_coefs)
    true_intercept = 0.
    if return_intercept:
        true_intercept = 10000.
    y += true_intercept
    X_testing = arr_type(X)

    alpha, atol, tol = 1e-3, 1e-4, 1e-6

    if solver not in ['sag', 'auto'] and return_intercept:
        assert_raises_regex(ValueError,
                            "In Ridge, only 'sag' solver",
                            ridge_regression,
                            X_testing,
                            y,
                            alpha=alpha,
                            solver=solver,
                            sample_weight=sample_weight,
                            return_intercept=return_intercept,
                            tol=tol)
        return

    out = ridge_regression(
        X_testing,
        y,
        alpha=alpha,
        solver=solver,
        sample_weight=sample_weight,
        return_intercept=return_intercept,
        tol=tol,
    )

    if return_intercept:
        coef, intercept = out
        assert_allclose(coef, true_coefs, rtol=0, atol=atol)
        assert_allclose(intercept, true_intercept, rtol=0, atol=atol)
    else:
        assert_allclose(out, true_coefs, rtol=0, atol=atol)
Пример #12
0
def test_ridge_regression_check_arguments_validity(return_intercept,
                                                   sample_weight, arr_type,
                                                   solver):
    """check if all combinations of arguments give valid estimations"""

    # test excludes 'svd' solver because it raises exception for sparse inputs

    rng = check_random_state(42)
    X = rng.rand(1000, 3)
    true_coefs = [1, 2, 0.1]
    y = np.dot(X, true_coefs)
    true_intercept = 0.
    if return_intercept:
        true_intercept = 10000.
    y += true_intercept
    X_testing = arr_type(X)

    alpha, atol, tol = 1e-3, 1e-4, 1e-6

    if solver not in ['sag', 'auto'] and return_intercept:
        assert_raises_regex(ValueError,
                            "In Ridge, only 'sag' solver",
                            ridge_regression, X_testing, y,
                            alpha=alpha,
                            solver=solver,
                            sample_weight=sample_weight,
                            return_intercept=return_intercept,
                            tol=tol)
        return

    out = ridge_regression(X_testing, y, alpha=alpha,
                           solver=solver,
                           sample_weight=sample_weight,
                           return_intercept=return_intercept,
                           tol=tol,
                           )

    if return_intercept:
        coef, intercept = out
        assert_allclose(coef, true_coefs, rtol=0, atol=atol)
        assert_allclose(intercept, true_intercept, rtol=0, atol=atol)
    else:
        assert_allclose(out, true_coefs, rtol=0, atol=atol)
Пример #13
0
 def func():
     X = np.eye(3)
     y = np.ones(3)
     ridge_regression(X, y, alpha=1., solver=wrong_solver)
Пример #14
0
 def solT(X, y):
     return ridge_regression(X, y, alpha=0., solver="cholesky").T
Пример #15
0
 def func():
     X = np.eye(3)
     y = np.ones(3)
     ridge_regression(X, y, alpha=1., solver=wrong_solver)
Пример #16
0
    def _fit(self, X, y, sample_weight=None, incremental=False):
        """Fit the model to the data X and target y."""
        # Validate input params
        if self.n_hidden <= 0:
            raise ValueError("n_hidden must be > 0, got %s." % self.n_hidden)
        if self.C <= 0.0:
            raise ValueError("C must be > 0, got %s." % self.C)
        if self.activation not in ACTIVATIONS:
            raise ValueError("The activation %s is not supported. Supported "
                             "activation are %s." %
                             (self.activation, ACTIVATIONS))

        # Initialize public attributes
        if not hasattr(self, 'classes_'):
            self.classes_ = None
        if not hasattr(self, 'coef_hidden_'):
            self.coef_hidden_ = None

        # Initialize private attributes
        if not hasattr(self, '_HT_H_accumulated'):
            self._HT_H_accumulated = None

        X, y = check_X_y(X,
                         y,
                         accept_sparse=['csr', 'csc', 'coo'],
                         dtype=np.float64,
                         order="C",
                         multi_output=True)

        # This outputs a warning when a 1d array is expected
        if y.ndim == 2 and y.shape[1] == 1:
            y = column_or_1d(y, warn=True)

        # Classification
        if isinstance(self, ClassifierMixin):
            self.label_binarizer_.fit(y)

            if self.classes_ is None or not incremental:
                self.classes_ = self.label_binarizer_.classes_
                # if sample_weight is None:
                #     sample_weight = compute_sample_weight(self.class_weight,
                #                                           self.classes_, y)
            else:
                classes = self.label_binarizer_.classes_
                if not np.all(np.in1d(classes, self.classes_)):
                    raise ValueError("`y` has classes not in `self.classes_`."
                                     " `self.classes_` has %s. 'y' has %s." %
                                     (self.classes_, classes))

            y = self.label_binarizer_.transform(y)

        # Ensure y is 2D
        if y.ndim == 1:
            y = np.reshape(y, (-1, 1))

        n_samples, n_features = X.shape
        self.n_outputs_ = y.shape[1]

        # Step (1/2): Compute the hidden layer coefficients
        if (self.coef_hidden_ is None
                or (not incremental and not self.warm_start)):
            # Randomize and scale the input-to-hidden coefficients
            self._init_weights(n_features)

        # Step (2/2): Compute hidden-to-output coefficients
        if self.batch_size is None:
            # Run the least-square algorithm on the whole dataset
            batch_size = n_samples
        else:
            # Run the recursive least-square algorithm on mini-batches
            batch_size = self.batch_size

        batches = gen_batches(n_samples, batch_size)

        # (First time call) Run the least-square algorithm on batch 0
        if not incremental or self._HT_H_accumulated is None:
            batch_slice = next(batches)
            H_batch = self._compute_hidden_activations(X[batch_slice])

            # Get sample weights for the batch
            if sample_weight is None:
                sw = None
            else:
                sw = sample_weight[batch_slice]

            # beta_{0} = inv(H_{0}^T H_{0} + (1. / C) * I) * H_{0}.T y_{0}
            self.coef_output_ = ridge_regression(H_batch,
                                                 y[batch_slice],
                                                 1. / self.C,
                                                 sample_weight=sw).T

            # Initialize K if this is batch based or partial_fit
            if self.batch_size is not None or incremental:
                # K_{0} = H_{0}^T * W * H_{0}
                weighted_H_batch = _multiply_weights(H_batch, sw)
                self._HT_H_accumulated = safe_sparse_dot(
                    H_batch.T, weighted_H_batch)

            if self.verbose:
                y_scores = self._decision_scores(X[batch_slice])

                if self.batch_size is None:
                    verbose_string = "Training mean squared error ="
                else:
                    verbose_string = "Batch 0, Training mean squared error ="

                print("%s %f" %
                      (verbose_string,
                       mean_squared_error(
                           y[batch_slice], y_scores, sample_weight=sw)))

        # Run the least-square algorithm on batch 1, 2, ..., n
        for batch, batch_slice in enumerate(batches):
            # Compute hidden activations H_{i} for batch i
            H_batch = self._compute_hidden_activations(X[batch_slice])

            # Get sample weights (sw) for the batch
            if sample_weight is None:
                sw = None
            else:
                sw = sample_weight[batch_slice]

            weighted_H_batch = _multiply_weights(H_batch, sw)

            # Update K_{i+1} by H_{i}^T * W * H_{i}
            self._HT_H_accumulated += safe_sparse_dot(H_batch.T,
                                                      weighted_H_batch)

            # Update beta_{i+1} by
            # K_{i+1}^{-1} * H_{i+1}^T * W * (y_{i+1} - H_{i+1} * beta_{i})
            y_batch = y[batch_slice] - safe_sparse_dot(H_batch,
                                                       self.coef_output_)

            weighted_y_batch = _multiply_weights(y_batch, sw)
            Hy_batch = safe_sparse_dot(H_batch.T, weighted_y_batch)

            # Update hidden-to-output coefficients
            regularized_HT_H = self._HT_H_accumulated.copy()
            regularized_HT_H.flat[::self.n_hidden + 1] += 1. / self.C

            # It is safe to use linalg.solve (instead of linalg.lstsq
            # which is slow) since it is highly unlikely that
            # regularized_HT_H is singular due to the random
            # projection of the first layer and 'C' regularization being
            # not dangerously large.
            self.coef_output_ += linalg.solve(regularized_HT_H,
                                              Hy_batch,
                                              sym_pos=True,
                                              overwrite_a=True,
                                              overwrite_b=True)
            if self.verbose:
                y_scores = self._decision_scores(X[batch_slice])
                print("Batch %d, Training mean squared error = %f" %
                      (batch + 1,
                       mean_squared_error(
                           y[batch_slice], y_scores, sample_weight=sw)))
        return self
    def _fit(self, X, y, sample_weight=None, incremental=False):
        """Fit the model to the data X and target y."""
        # Validate input params
        if self.n_hidden <= 0:
            raise ValueError("n_hidden must be > 0, got %s." % self.n_hidden)
        if self.C <= 0.0:
            raise ValueError("C must be > 0, got %s." % self.C)
        if self.activation not in ACTIVATIONS:
            raise ValueError("The activation %s is not supported. Supported "
                             "activation are %s." % (self.activation,
                                                     ACTIVATIONS))

        # Initialize public attributes
        if not hasattr(self, 'classes_'):
            self.classes_ = None
        if not hasattr(self, 'coef_hidden_'):
            self.coef_hidden_ = None

        # Initialize private attributes
        if not hasattr(self, '_HT_H_accumulated'):
            self._HT_H_accumulated = None

        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                         dtype=np.float64, order="C", multi_output=True)

        # This outputs a warning when a 1d array is expected
        if y.ndim == 2 and y.shape[1] == 1:
            y = column_or_1d(y, warn=True)

        # Classification
        if isinstance(self, ClassifierMixin):
            self.label_binarizer_.fit(y)

            if self.classes_ is None or not incremental:
                self.classes_ = self.label_binarizer_.classes_
                if sample_weight is None:
                    sample_weight = compute_sample_weight(self.class_weight,
                                                          self.classes_, y)
            else:
                classes = self.label_binarizer_.classes_
                if not np.all(np.in1d(classes, self.classes_)):
                    raise ValueError("`y` has classes not in `self.classes_`."
                                     " `self.classes_` has %s. 'y' has %s." %
                                     (self.classes_, classes))

            y = self.label_binarizer_.transform(y)

        # Ensure y is 2D
        if y.ndim == 1:
            y = np.reshape(y, (-1, 1))

        n_samples, n_features = X.shape
        self.n_outputs_ = y.shape[1]

        # Step (1/2): Compute the hidden layer coefficients
        if (self.coef_hidden_ is None or (not incremental and
                                          not self.warm_start)):
            # Randomize and scale the input-to-hidden coefficients
            self._init_weights(n_features)

        # Step (2/2): Compute hidden-to-output coefficients
        if self.batch_size is None:
            # Run the least-square algorithm on the whole dataset
            batch_size = n_samples
        else:
            # Run the recursive least-square algorithm on mini-batches
            batch_size = self.batch_size

        batches = gen_batches(n_samples, batch_size)

        # (First time call) Run the least-square algorithm on batch 0
        if not incremental or self._HT_H_accumulated is None:
            batch_slice = next(batches)
            H_batch = self._compute_hidden_activations(X[batch_slice])

            # Get sample weights for the batch
            if sample_weight is None:
                sw = None
            else:
                sw = sample_weight[batch_slice]

            # beta_{0} = inv(H_{0}^T H_{0} + (1. / C) * I) * H_{0}.T y_{0}
            self.coef_output_ = ridge_regression(H_batch, y[batch_slice],
                                                 1. / self.C,
                                                 sample_weight=sw).T

            # Initialize K if this is batch based or partial_fit
            if self.batch_size is not None or incremental:
                # K_{0} = H_{0}^T * W * H_{0}
                weighted_H_batch = _multiply_weights(H_batch, sw)
                self._HT_H_accumulated = safe_sparse_dot(H_batch.T,
                                                         weighted_H_batch)

            if self.verbose:
                y_scores = self._decision_scores(X[batch_slice])

                if self.batch_size is None:
                    verbose_string = "Training mean squared error ="
                else:
                    verbose_string = "Batch 0, Training mean squared error ="

                print("%s %f" % (verbose_string,
                                 mean_squared_error(y[batch_slice], y_scores,
                                                    sample_weight=sw)))

        # Run the least-square algorithm on batch 1, 2, ..., n
        for batch, batch_slice in enumerate(batches):
            # Compute hidden activations H_{i} for batch i
            H_batch = self._compute_hidden_activations(X[batch_slice])

            # Get sample weights (sw) for the batch
            if sample_weight is None:
                sw = None
            else:
                sw = sample_weight[batch_slice]

            weighted_H_batch = _multiply_weights(H_batch, sw)

            # Update K_{i+1} by H_{i}^T * W * H_{i}
            self._HT_H_accumulated += safe_sparse_dot(H_batch.T,
                                                      weighted_H_batch)

            # Update beta_{i+1} by
            # K_{i+1}^{-1} * H_{i+1}^T * W * (y_{i+1} - H_{i+1} * beta_{i})
            y_batch = y[batch_slice] - safe_sparse_dot(H_batch,
                                                       self.coef_output_)

            weighted_y_batch = _multiply_weights(y_batch, sw)
            Hy_batch = safe_sparse_dot(H_batch.T, weighted_y_batch)

            # Update hidden-to-output coefficients
            regularized_HT_H = self._HT_H_accumulated.copy()
            regularized_HT_H.flat[::self.n_hidden + 1] += 1. / self.C

            # It is safe to use linalg.solve (instead of linalg.lstsq
            # which is slow) since it is highly unlikely that
            # regularized_HT_H is singular due to the random
            # projection of the first layer and 'C' regularization being
            # not dangerously large.
            self.coef_output_ += linalg.solve(regularized_HT_H, Hy_batch,
                                              sym_pos=True, overwrite_a=True,
                                              overwrite_b=True)
            if self.verbose:
                y_scores = self._decision_scores(X[batch_slice])
                print("Batch %d, Training mean squared error = %f" %
                      (batch + 1, mean_squared_error(y[batch_slice], y_scores,
                                                     sample_weight=sw)))
        return self
Пример #18
0
 def solT(X, y):
     return ridge_regression(X, y, alpha=0.).T
Пример #19
0
 def solT(X, y):
     return ridge_regression(X, y, alpha=0.).T