Exemplo n.º 1
0
def test_vmap_transform_on_iris():
    data = iris.data[iris_selection]
    fitter = VMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data)

    new_data = iris.data[~iris_selection]
    embedding = fitter.transform(new_data)

    trust = trustworthiness(new_data, embedding, 10)
    assert_greater_equal(
        trust,
        0.89,
        "Insufficiently trustworthy transform for"
        "iris dataset: {}".format(trust),
    )
Exemplo n.º 2
0
def test_multi_component_layout():
    data, labels = datasets.make_blobs(100,
                                       2,
                                       centers=5,
                                       cluster_std=0.5,
                                       center_box=[-20, 20],
                                       random_state=42)

    true_centroids = np.empty((labels.max() + 1, data.shape[1]),
                              dtype=np.float64)

    for label in range(labels.max() + 1):
        true_centroids[label] = data[labels == label].mean(axis=0)

    true_centroids = normalize(true_centroids, norm="l2")

    embedding = VMAP(n_neighbors=4).fit_transform(data)
    embed_centroids = np.empty((labels.max() + 1, data.shape[1]),
                               dtype=np.float64)
    embed_labels = KMeans(n_clusters=5).fit_predict(embedding)

    for label in range(embed_labels.max() + 1):
        embed_centroids[label] = data[embed_labels == label].mean(axis=0)

    embed_centroids = normalize(embed_centroids, norm="l2")

    error = np.sum((true_centroids - embed_centroids)**2)

    assert_less(error, 15.0, msg="Multi component embedding to far astray")
Exemplo n.º 3
0
def test_bad_too_large_min_dist():
    u = VMAP(min_dist=2.0)
    # a RuntimeWarning about division by zero in a,b curve fitting is expected
    # caught and ignored for this test
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", category=RuntimeWarning)
        assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 4
0
def test_vmap_transform_embedding_stability():
    """Test that transforming data does not alter the learned embeddings

    Issue #217 describes how using transform to embed new data using a
    trained VMAP transformer causes the fitting embedding matrix to change
    in cases when the new data has the same number of rows as the original
    training data.
    """

    data = iris.data[iris_selection]
    fitter = VMAP(n_neighbors=10, min_dist=0.01, random_state=42).fit(data)
    original_embedding = fitter.embedding_.copy()

    # The important point is that the new data has the same number of rows
    # as the original fit data
    new_data = np.random.random(data.shape)
    embedding = fitter.transform(new_data)

    assert_array_equal(
        original_embedding, fitter.embedding_,
        "Transforming new data changed the original embeddings")

    # Example from issue #217
    a = np.random.random((1000, 10))
    b = np.random.random((1000, 5))

    vmap = VMAP()
    u1 = vmap.fit_transform(a[:, :5])
    u1_orig = u1.copy()
    assert_array_equal(u1_orig, vmap.embedding_)

    u2 = vmap.transform(b)
    assert_array_equal(u1_orig, vmap.embedding_)
Exemplo n.º 5
0
def test_vmap_sparse_trustworthiness():
    embedding = VMAP(n_neighbors=10).fit_transform(sparse_nn_data[:100])
    trust = trustworthiness(sparse_nn_data[:100].toarray(), embedding, 10)
    assert_greater_equal(
        trust,
        0.92,
        "Insufficiently trustworthy embedding for"
        "sparse test dataset: {}".format(trust),
    )
Exemplo n.º 6
0
def test_supervised_vmap_trustworthiness_on_iris():
    data = iris.data
    embedding = VMAP(n_neighbors=10, min_dist=0.01,
                     random_state=42).fit_transform(data, iris.target)
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.97,
        "Insufficiently trustworthy embedding for"
        "iris dataset: {}".format(trust),
    )
Exemplo n.º 7
0
def test_vmap_trustworthiness_on_iris_random_init():
    data = iris.data
    embedding = VMAP(n_neighbors=10,
                     min_dist=0.01,
                     random_state=42,
                     init="random").fit_transform(data)
    trust = trustworthiness(iris.data, embedding, 10)
    assert_greater_equal(
        trust,
        0.95,
        "Insufficiently trustworthy embedding for"
        "iris dataset: {}".format(trust),
    )
Exemplo n.º 8
0
def test_negative_nepochs():
    u = VMAP(n_epochs=-2)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 9
0
def test_bad_matrix_init():
    u = VMAP(init=np.array([[0, 0, 0], [0, 0, 0]]))
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 10
0
def test_bad_numeric_init():
    u = VMAP(init=42)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 11
0
def test_bad_init():
    u = VMAP(init="foobar")
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 12
0
def test_negative_sample_rate():
    u = VMAP(negative_sample_rate=-1)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 13
0
def test_negative_op():
    u = VMAP(set_op_mix_ratio=-1.0)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 14
0
def test_vmap_fit_params():
    # x and y are required to be the same length
    u = VMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(257, ))
    assert_raises(ValueError, u.fit, x, y)

    u = VMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(255, ))
    assert_raises(ValueError, u.fit, x, y)

    u = VMAP()
    x = np.random.uniform(0, 1, (256, 10))
    assert_raises(ValueError, u.fit, x, [])

    u = VMAP()
    x = np.random.uniform(0, 1, (256, 10))
    y = np.random.randint(10, size=(256, ))
    res = u.fit(x, y)
    assert isinstance(res, VMAP)

    u = VMAP()
    x = np.random.uniform(0, 1, (256, 10))
    res = u.fit(x)
    assert isinstance(res, VMAP)
Exemplo n.º 15
0
def test_bad_metric():
    u = VMAP(metric=45)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 16
0
def test_non_integer_ncomponents():
    u = VMAP(n_components=1.5)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 17
0
def test_negative_ncomponents():
    u = VMAP(n_components=-1)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 18
0
def test_negative_min_dist():
    u = VMAP(min_dist=-1)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 19
0
def test_too_large_op():
    u = VMAP(set_op_mix_ratio=1.5)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 20
0
def test_negative_target_nneighbors():
    u = VMAP(target_n_neighbors=1)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 21
0
def test_negative_learning_rate():
    u = VMAP(learning_rate=-1.5)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 22
0
def test_too_many_neighbors_warns():
    u = VMAP(a=1.2, b=1.75, n_neighbors=2000, n_epochs=11, init="random")
    u.fit(nn_data[:100, ])
    assert_equal(u._a, 1.2)
    assert_equal(u._b, 1.75)
Exemplo n.º 23
0
def test_negative_repulsion():
    u = VMAP(repulsion_strength=-0.5)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 24
0
def test_too_small_nneighbors():
    u = VMAP(n_neighbors=0.5)
    assert_raises(ValueError, u.fit, nn_data)
Exemplo n.º 25
0
def test_blobs_cluster():
    data, labels = datasets.make_blobs(n_samples=500, n_features=10, centers=5)
    embedding = VMAP().fit_transform(data)
    assert_equal(adjusted_rand_score(labels,
                                     KMeans(5).fit_predict(embedding)), 1.0)