def test_densmap_transform_on_iris(): data = iris.data[iris_selection] fitter = DENSMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data) new_data = iris.data[~iris_selection] embedding = fitter.transform(new_data) trust = trustworthiness(new_data, embedding, 10) assert_greater_equal( trust, 0.89, "Insufficiently trustworthy transform for" "iris dataset: {}".format(trust), )
def test_multi_component_layout(): data, labels = datasets.make_blobs(100, 2, centers=5, cluster_std=0.5, center_box=[-20, 20], random_state=42) true_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64) for label in range(labels.max() + 1): true_centroids[label] = data[labels == label].mean(axis=0) true_centroids = normalize(true_centroids, norm="l2") embedding = DENSMAP(n_neighbors=4).fit_transform(data) embed_centroids = np.empty((labels.max() + 1, data.shape[1]), dtype=np.float64) embed_labels = KMeans(n_clusters=5).fit_predict(embedding) for label in range(embed_labels.max() + 1): embed_centroids[label] = data[embed_labels == label].mean(axis=0) embed_centroids = normalize(embed_centroids, norm="l2") error = np.sum((true_centroids - embed_centroids)**2) assert_less(error, 15.0, msg="Multi component embedding to far astray")
def test_bad_too_large_min_dist(): u = DENSMAP(min_dist=2.0) # a RuntimeWarning about division by zero in a,b curve fitting is expected # caught and ignored for this test with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) assert_raises(ValueError, u.fit, nn_data)
def test_densmap_transform_embedding_stability(): """Test that transforming data does not alter the learned embeddings Issue #217 describes how using transform to embed new data using a trained DENSMAP transformer causes the fitting embedding matrix to change in cases when the new data has the same number of rows as the original training data. """ data = iris.data[iris_selection] fitter = DENSMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data) original_embedding = fitter.embedding_.copy() # The important point is that the new data has the same number of rows # as the original fit data new_data = np.random.random(data.shape) embedding = fitter.transform(new_data) assert_array_equal( original_embedding, fitter.embedding_, "Transforming new data changed the original embeddings") # Example from issue #217 a = np.random.random((1000, 10)) b = np.random.random((1000, 5)) densmap = DENSMAP() u1 = densmap.fit_transform(a[:, :5]) u1_orig = u1.copy() assert_array_equal(u1_orig, densmap.embedding_) u2 = densmap.transform(b) assert_array_equal(u1_orig, densmap.embedding_)
def test_densmap_sparse_trustworthiness(): embedding = DENSMAP(n_neighbors=10).fit_transform(sparse_nn_data[:100]) trust = trustworthiness(sparse_nn_data[:100].toarray(), embedding, 10) assert_greater_equal( trust, 0.92, "Insufficiently trustworthy embedding for" "sparse test dataset: {}".format(trust), )
def test_supervised_densmap_trustworthiness_on_iris(): data = iris.data embedding = DENSMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit_transform(data, iris.target) trust = trustworthiness(iris.data, embedding, 10) assert_greater_equal( trust, 0.97, "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust), )
def test_densmap_trustworthiness_on_iris_random_init(): data = iris.data embedding = DENSMAP(n_neighbors=10, min_dist=0.01, random_state=42, init="random").fit_transform(data) trust = trustworthiness(iris.data, embedding, 10) assert_greater_equal( trust, 0.95, "Insufficiently trustworthy embedding for" "iris dataset: {}".format(trust), )
def test_negative_nepochs(): u = DENSMAP(n_epochs=-2) assert_raises(ValueError, u.fit, nn_data)
def test_bad_matrix_init(): u = DENSMAP(init=np.array([[0, 0, 0], [0, 0, 0]])) assert_raises(ValueError, u.fit, nn_data)
def test_bad_numeric_init(): u = DENSMAP(init=42) assert_raises(ValueError, u.fit, nn_data)
def test_bad_init(): u = DENSMAP(init="foobar") assert_raises(ValueError, u.fit, nn_data)
def test_negative_sample_rate(): u = DENSMAP(negative_sample_rate=-1) assert_raises(ValueError, u.fit, nn_data)
def test_negative_op(): u = DENSMAP(set_op_mix_ratio=-1.0) assert_raises(ValueError, u.fit, nn_data)
def test_densmap_fit_params(): # x and y are required to be the same length u = DENSMAP() x = np.random.uniform(0, 1, (256, 10)) y = np.random.randint(10, size=(257, )) assert_raises(ValueError, u.fit, x, y) u = DENSMAP() x = np.random.uniform(0, 1, (256, 10)) y = np.random.randint(10, size=(255, )) assert_raises(ValueError, u.fit, x, y) u = DENSMAP() x = np.random.uniform(0, 1, (256, 10)) assert_raises(ValueError, u.fit, x, []) u = DENSMAP() x = np.random.uniform(0, 1, (256, 10)) y = np.random.randint(10, size=(256, )) res = u.fit(x, y) assert isinstance(res, DENSMAP) u = DENSMAP() x = np.random.uniform(0, 1, (256, 10)) res = u.fit(x) assert isinstance(res, DENSMAP)
def test_bad_metric(): u = DENSMAP(metric=45) assert_raises(ValueError, u.fit, nn_data)
def test_non_integer_ncomponents(): u = DENSMAP(n_components=1.5) assert_raises(ValueError, u.fit, nn_data)
def test_negative_ncomponents(): u = DENSMAP(n_components=-1) assert_raises(ValueError, u.fit, nn_data)
def test_negative_min_dist(): u = DENSMAP(min_dist=-1) assert_raises(ValueError, u.fit, nn_data)
def test_too_large_op(): u = DENSMAP(set_op_mix_ratio=1.5) assert_raises(ValueError, u.fit, nn_data)
def test_negative_target_nneighbors(): u = DENSMAP(target_n_neighbors=1) assert_raises(ValueError, u.fit, nn_data)
def test_negative_learning_rate(): u = DENSMAP(learning_rate=-1.5) assert_raises(ValueError, u.fit, nn_data)
def test_too_many_neighbors_warns(): u = DENSMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random") u.fit(nn_data[:100, ]) assert_equal(u._a, 1.2) assert_equal(u._b, 1.75)
def test_negative_repulsion(): u = DENSMAP(repulsion_strength=-0.5) assert_raises(ValueError, u.fit, nn_data)
def test_too_small_nneighbors(): u = DENSMAP(n_neighbors=0.5) assert_raises(ValueError, u.fit, nn_data)
def test_blobs_cluster(): data, labels = datasets.make_blobs(n_samples=500, n_features=10, centers=5) embedding = DENSMAP().fit_transform(data) assert_equal(adjusted_rand_score(labels, KMeans(5).fit_predict(embedding)), 1.0)