def test_can_pass_callbacks_to_tsne_object(self): callback = MagicMock() callback2 = MagicMock() # We don't want individual callbacks to be iterable del callback.__iter__ del callback2.__iter__ # Should be able to pass a single callback TSNE(callbacks=callback, callbacks_every_iters=1, early_exaggeration_iter=0, n_iter=1).fit(self.x) self.assertEqual(callback.call_count, 1) # Should be able to pass a list callbacks callback.reset_mock() TSNE(callbacks=[callback], callbacks_every_iters=1, early_exaggeration_iter=0, n_iter=1).fit(self.x) self.assertEqual(callback.call_count, 1) # Should be able to change the callback on the object callback.reset_mock() tsne = TSNE(callbacks=callback, callbacks_every_iters=1, early_exaggeration_iter=0, n_iter=1) tsne.callbacks = callback2 tsne.fit(self.x) callback.assert_not_called() self.assertEqual(callback2.call_count, 1)
def test_same_results_on_fixed_random_state_pca_init(self): """Results should be exactly the same if we provide a random state.""" tsne1 = TSNE(random_state=1, initialization='pca') embedding1 = tsne1.fit(self.x) tsne2 = TSNE(random_state=1, initialization='pca') embedding2 = tsne2.fit(self.x) np.testing.assert_array_equal( embedding1, embedding2, 'Same random state produced different initial embeddings')
def test_unfitted_pca_model(self): """Using PCA initialization in `transform` should fail when the initial embedding was initialized with PCA.""" tsne = TSNE(initialization='random') embedding = tsne.fit(self.x) # Transforming using `pca` init on embedding that did not use # `pca` init did not fail with self.assertRaises(AssertionError): embedding.transform(self.x_test, initialization='pca')
def transform(n_jobs=4, grad='bh', neighbors='approx'): # iris = datasets.load_iris() # x, y = iris['data'], iris['target'] x, y = get_mnist(20000) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42) tsne = TSNE( n_components=2, perplexity=30, learning_rate=100, early_exaggeration=12, n_jobs=n_jobs, theta=0.5, initialization='random', metric='euclidean', n_iter=750, early_exaggeration_iter=250, neighbors=neighbors, negative_gradient_method=grad, min_num_intervals=10, ints_in_interval=2, late_exaggeration_iter=0, late_exaggeration=4, callbacks=[ErrorLogger()], ) start = time.time() embedding = tsne.fit(x_train) print('tsne train', time.time() - start) plt.subplot(121) plot(embedding, y_train, show=False, ms=3) start = time.time() partial_embedding = embedding.transform(x_test, perplexity=20) # partial_embedding = embedding.get_partial_embedding_for( # x_test, perplexity=10, initialization='random') # partial_embedding.optimize(200, exaggeration=2, inplace=True, momentum=0.1) print('tsne transform', time.time() - start) plt.subplot(122) plot(embedding, y_train, show=False, ms=3, alpha=0.25) plt.gca().set_color_cycle(None) plot(partial_embedding, y_test, show=False, ms=3, alpha=0.8) plt.show()
def test_partial_embedding_optimize(self, param_name, param_value, gradient_descent): # type: (str, Any, MagicMock) -> None # Make sure mock still conforms to signature gradient_descent.return_value = (1, MagicMock()) # Perform initial embedding - this is tested above tsne = TSNE() embedding = tsne.fit(self.x) gradient_descent.reset_mock() # `optimize` requires us to specify the `n_iter` params = {'n_iter': 50, param_name: param_value} partial_embedding = embedding.prepare_partial(self.x_test) partial_embedding.optimize(**params, inplace=True) self.assertEqual(1, gradient_descent.call_count) check_call_contains_kwargs(gradient_descent.mock_calls[0], params)
def test_embedding_transform(self, param_name, param_value, gradient_descent): # type: (str, Any, MagicMock) -> None # Make sure mock still conforms to signature gradient_descent.return_value = (1, MagicMock()) # Perform initial embedding - this is tested above tsne = TSNE() embedding = tsne.fit(self.x) gradient_descent.reset_mock() embedding.transform(self.x_test, **{param_name: param_value}) # Early exaggeration training loop if param_name == 'early_exaggeration_iter': check_param_name = 'n_iter' call_idx = 0 elif param_name == 'early_exaggeration': check_param_name = 'exaggeration' call_idx = 0 elif param_name == 'initial_momentum': check_param_name = 'momentum' call_idx = 0 # Main training loop elif param_name == 'n_iter': check_param_name = param_name call_idx = 1 elif param_name == 'final_momentum': check_param_name = 'momentum' call_idx = 1 # If general parameter, should be applied to every call else: check_param_name = param_name call_idx = 0 self.assertEqual(2, gradient_descent.call_count) check_call_contains_kwargs(gradient_descent.mock_calls[call_idx], {check_param_name: param_value})
def run(perplexity=30, learning_rate=100, n_jobs=4): x, y = get_mouse_60k() # x, y = get_fashion_mnist() angle = 0.5 ee = 12 metric = 'euclidean' print(x.shape) start = time.time() tsne = TSNE( perplexity=perplexity, learning_rate=learning_rate, early_exaggeration=ee, n_jobs=n_jobs, theta=angle, initialization='random', metric=metric, n_components=2, n_iter=750, early_exaggeration_iter=250, neighbors='approx', negative_gradient_method='fft', min_num_intervals=10, ints_in_interval=1, late_exaggeration_iter=0, late_exaggeration=2., callbacks=ErrorLogger(), ) # x = PCA(n_components=50).fit_transform(x) embedding = tsne.fit(x) print('-' * 80) print('tsne', time.time() - start) plt.title('tsne') plot(embedding, y) return x = np.ascontiguousarray(x.astype(np.float64)) from fitsne import FItSNE start = time.time() embedding = FItSNE( x, 2, perplexity=perplexity, stop_lying_iter=250, ann_not_vptree=True, early_exag_coeff=ee, nthreads=n_jobs, theta=angle, ) print('-' * 80) print('fft interp %.4f' % (time.time() - start)) plt.title('fft interp') plot(embedding, y) plt.show() return init = PCA(n_components=2).fit_transform(x) start = time.time() embedding = MulticoreTSNE(early_exaggeration=ee, learning_rate=learning_rate, perplexity=perplexity, n_jobs=n_jobs, cheat_metric=False, angle=angle, init=init, metric=metric, verbose=True).fit_transform(x) print('-' * 80) print('mctsne', time.time() - start) plt.title('mctsne') plot(embedding, y) plt.show() start = time.time() embedding = SKLTSNE( early_exaggeration=ee, learning_rate=learning_rate, angle=angle, perplexity=perplexity, init='pca', metric=metric, ).fit_transform(x) print('-' * 80) print('sklearn', time.time() - start) plt.title('sklearn') plot(embedding, y) plt.show()
def test_fitted_pca_model(self): """Using PCA initialization in `transform` should work when the initial embedding was initialized with PCA.""" tsne = TSNE(initialization='pca') embedding = tsne.fit(self.x) embedding.transform(self.x_test, initialization='pca')