Пример #1
0
    def test_low_variance(self):
        """Low variance of the initial embedding is very important for the
        convergence of tSNE."""
        # Cycle through various initializations
        initializations = ['random', 'pca']
        allowed = 1e-3

        for init in initializations:
            tsne = TSNE(initialization=init, perplexity=2)
            embedding = tsne.prepare_initial(self.x)
            np.testing.assert_array_less(
                np.var(embedding, axis=0), allowed,
                'using the `%s` initialization' % init)
Пример #2
0
def check_error_approx():
    x, y = get_mouse_60k(1500)

    tsne = TSNE(
        perplexity=20,
        learning_rate=100,
        early_exaggeration=12,
        n_jobs=4,
        theta=0.5,
        initialization='pca',
        metric='euclidean',
        n_components=2,
        n_iter=750,
        early_exaggeration_iter=250,
        neighbors='exact',
        negative_gradient_method='bh',
        min_num_intervals=10,
        ints_in_interval=2,
        late_exaggeration_iter=0,
        late_exaggeration=4,
        callbacks=ErrorLogger(),
    )
    embedding = tsne.prepare_initial(x, initialization='random')

    errors = ErrorApproximations(embedding.affinities.P)
    logger = ErrorLogger()
    embedding.optimize(
        250,
        exaggeration=12,
        callbacks=[errors, logger],
        callbacks_every_iters=5,
        inplace=True,
    )
    embedding.optimize(
        750,
        exaggeration=None,
        callbacks=[errors, logger],
        callbacks_every_iters=5,
        inplace=True,
    )
    errors.report()

    plot(embedding, y)

    x = list(range(len(errors.exact_errors)))
    plt.semilogy(x, errors.exact_errors, label='Exact')
    plt.semilogy(x, errors.bh_errors, label='BH')
    plt.semilogy(x, errors.fft_errors, label='FFT')
    plt.legend()
    plt.show()
Пример #3
0
    def test_embedding_optimize(self, param_name, param_value,
                                gradient_descent):
        # type: (str, Any, MagicMock) -> None
        # Make sure mock still conforms to signature
        gradient_descent.return_value = (1, MagicMock())

        # `optimize` requires us to specify the `n_iter`
        params = {'n_iter': 50, param_name: param_value}

        tsne = TSNE()
        embedding = tsne.prepare_initial(self.x)
        embedding.optimize(**params, inplace=True)

        self.assertEqual(1, gradient_descent.call_count)
        check_call_contains_kwargs(gradient_descent.mock_calls[0], params)
Пример #4
0
    def test_iris(self):
        iris = datasets.load_iris()
        x, y = iris['data'], iris['target']

        # Evaluate tSNE optimization using a KNN classifier
        knn = KNeighborsClassifier(n_neighbors=10)
        tsne = TSNE(perplexity=30, initialization='random', random_state=0)

        # Prepare a random initialization
        embedding = tsne.prepare_initial(x)

        # KNN should do poorly on a random initialization
        knn.fit(embedding, y)
        predictions = knn.predict(embedding)
        self.assertTrue(accuracy_score(predictions, y) < .5)

        # Optimize the embedding for a small number of steps so tests run fast
        embedding.optimize(50, inplace=True)

        # Similar points should be grouped together, therefore KNN should do well
        knn.fit(embedding, y)
        predictions = knn.predict(embedding)
        self.assertTrue(accuracy_score(predictions, y) > .95)