Exemplo n.º 1
0
def test_gradient_descent_stops():
    # Test stopping conditions of gradient descent.
    class ObjectiveSmallGradient:
        def __init__(self):
            self.it = -1

        def __call__(self, _, compute_error=True):
            self.it += 1
            return (10 - self.it) / 10.0, np.array([1e-5])

    def flat_function(_, compute_error=True):
        return 0.0, np.ones(1)

    # Gradient norm
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100,
            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=1e-5, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert_equal(error, 1.0)
    assert_equal(it, 0)
    assert("gradient norm" in out)

    # Maximum number of iterations without improvement
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            flat_function, np.zeros(1), 0, n_iter=100,
            n_iter_without_progress=10, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=0.0, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert_equal(error, 0.0)
    assert_equal(it, 11)
    assert("did not make any progress" in out)

    # Maximum number of iterations
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        _, error, it = _gradient_descent(
            ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11,
            n_iter_without_progress=100, momentum=0.0, learning_rate=0.0,
            min_gain=0.0, min_grad_norm=0.0, verbose=2)
    finally:
        out = sys.stdout.getvalue()
        sys.stdout.close()
        sys.stdout = old_stdout
    assert_equal(error, 0.0)
    assert_equal(it, 10)
    assert("Iteration 10" in out)
Exemplo n.º 2
0
    def _tsne(self,
              P,
              degrees_of_freedom,
              n_samples,
              random_state,
              X_embedded=None,
              neighbors=None,
              skip_num_points=0):
        """Runs t-SNE."""
        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P
        # and the Student's t-distributions Q. The optimization algorithm that
        # we use is batch gradient descent with three stages:
        # * early exaggeration with momentum 0.5
        # * early exaggeration with momentum 0.8
        # * final optimization with momentum 0.8
        # The embedding is initialized with iid samples from Gaussians with
        # standard deviation 1e-4.

        if X_embedded is None:
            # Initialize embedding randomly
            X_embedded = 1e-4 * random_state.randn(n_samples,
                                                   self.n_components)
        params = X_embedded.ravel()

        opt_args = {
            "n_iter": 50,
            "momentum": 0.5,
            "it": 0,
            "learning_rate": self.learning_rate,
            "n_iter_without_progress": self.n_iter_without_progress,
            "verbose": self.verbose,
            "n_iter_check": 25,
            "kwargs": dict(skip_num_points=skip_num_points)
        }
        if self.method == 'barnes_hut':
            m = "Must provide an array of neighbors to use Barnes-Hut"
            assert neighbors is not None, m
            obj_func = tsne._kl_divergence_bh
            objective_error = tsne._kl_divergence_error
            sP = squareform(P).astype(np.float32)
            neighbors = neighbors.astype(np.int64)
            args = [
                sP, neighbors, degrees_of_freedom, n_samples, self.n_components
            ]
            opt_args['args'] = args
            opt_args['min_grad_norm'] = 1e-3
            # uncomment only this line
            print('modified t-SNE')
            # opt_args['n_iter_without_progress'] = 30
            # Don't always calculate the cost since that calculation
            # can be nearly as expensive as the gradient
            opt_args['objective_error'] = objective_error
            opt_args['kwargs']['angle'] = self.angle
            opt_args['kwargs']['verbose'] = self.verbose
        else:
            obj_func = tsne._kl_divergence
            opt_args['args'] = [
                P, degrees_of_freedom, n_samples, self.n_components
            ]
            opt_args['min_error_diff'] = 0.0
            opt_args['min_grad_norm'] = self.min_grad_norm

        # Early exaggeration
        P *= self.early_exaggeration

        params, kl_divergence, it = tsne._gradient_descent(
            obj_func, params, **opt_args)
        opt_args['n_iter'] = 100
        opt_args['momentum'] = 0.8
        opt_args['it'] = it + 1
        params, kl_divergence, it = tsne._gradient_descent(
            obj_func, params, **opt_args)
        if self.verbose:
            print("[t-SNE] KL divergence after %d iterations with early "
                  "exaggeration: %f" % (it + 1, kl_divergence))
        # Save the final number of iterations
        self.n_iter_final = it

        # Final optimization
        P /= self.early_exaggeration
        opt_args['n_iter'] = self.n_iter
        opt_args['it'] = it + 1
        params, error, it = tsne._gradient_descent(obj_func, params,
                                                   **opt_args)

        if self.verbose:
            print("[t-SNE] Error after %d iterations: %f" %
                  (it + 1, kl_divergence))

        X_embedded = params.reshape(n_samples, self.n_components)
        self.kl_divergence_ = kl_divergence

        return X_embedded
Exemplo n.º 3
0
    def _tsne(self, P, degrees_of_freedom, n_samples, random_state,
              X_embedded=None, neighbors=None, skip_num_points=0):
        """Runs t-SNE."""
        # t-SNE minimizes the Kullback-Leiber divergence of the Gaussians P
        # and the Student's t-distributions Q. The optimization algorithm that
        # we use is batch gradient descent with three stages:
        # * early exaggeration with momentum 0.5
        # * early exaggeration with momentum 0.8
        # * final optimization with momentum 0.8
        # The embedding is initialized with iid samples from Gaussians with
        # standard deviation 1e-4.

        if X_embedded is None:
            # Initialize embedding randomly
            X_embedded = 1e-4 * random_state.randn(n_samples,
                                                   self.n_components)
        params = X_embedded.ravel()

        opt_args = {"n_iter": 50, "momentum": 0.5, "it": 0,
                    "learning_rate": self.learning_rate,
                    "n_iter_without_progress": self.n_iter_without_progress,
                    "verbose": self.verbose, "n_iter_check": 25,
                    "kwargs": dict(skip_num_points=skip_num_points)}
        if self.method == 'barnes_hut':
            m = "Must provide an array of neighbors to use Barnes-Hut"
            assert neighbors is not None, m
            obj_func = tsne._kl_divergence_bh
            objective_error = tsne._kl_divergence_error
            sP = squareform(P).astype(np.float32)
            neighbors = neighbors.astype(np.int64)
            args = [sP, neighbors, degrees_of_freedom, n_samples,
                    self.n_components]
            opt_args['args'] = args
            opt_args['min_grad_norm'] = 1e-3
            # uncomment only this line
            print('modified t-SNE')
            # opt_args['n_iter_without_progress'] = 30
            # Don't always calculate the cost since that calculation
            # can be nearly as expensive as the gradient
            opt_args['objective_error'] = objective_error
            opt_args['kwargs']['angle'] = self.angle
            opt_args['kwargs']['verbose'] = self.verbose
        else:
            obj_func = tsne._kl_divergence
            opt_args['args'] = [P, degrees_of_freedom, n_samples,
                                self.n_components]
            opt_args['min_error_diff'] = 0.0
            opt_args['min_grad_norm'] = self.min_grad_norm

        # Early exaggeration
        P *= self.early_exaggeration

        params, kl_divergence, it = tsne._gradient_descent(obj_func, params,
                                                      **opt_args)
        opt_args['n_iter'] = 100
        opt_args['momentum'] = 0.8
        opt_args['it'] = it + 1
        params, kl_divergence, it = tsne._gradient_descent(obj_func, params,
                                                      **opt_args)
        if self.verbose:
            print("[t-SNE] KL divergence after %d iterations with early "
                  "exaggeration: %f" % (it + 1, kl_divergence))
        # Save the final number of iterations
        self.n_iter_final = it

        # Final optimization
        P /= self.early_exaggeration
        opt_args['n_iter'] = self.n_iter
        opt_args['it'] = it + 1
        params, error, it = tsne._gradient_descent(obj_func, params, **opt_args)

        if self.verbose:
            print("[t-SNE] Error after %d iterations: %f"
                  % (it + 1, kl_divergence))

        X_embedded = params.reshape(n_samples, self.n_components)
        self.kl_divergence_ = kl_divergence

        return X_embedded