示例#1
0
def test_wasserstein_vectorizer_lists():
    vectorizer = WassersteinVectorizer(random_state=42)
    result = vectorizer.fit_transform(distributions_data_list,
                                      vectors=vectors_data_list)
    transform_result = vectorizer.transform(distributions_data_list,
                                            vectors=vectors_data_list)
    assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
示例#2
0
def test_wasserstein_vectorizer_list_based_blockwise():
    lil_data = normalize(distributions_data, norm="l1").tolil()
    distributions = [np.array(x) for x in lil_data.data]
    vectors = [vectors_data[x] for x in lil_data.rows]
    vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k")
    result = vectorizer.fit_transform(distributions, vectors=vectors)
    transform_result = vectorizer.transform(distributions, vectors=vectors)
    assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
示例#3
0
def test_wasserstein_vectorizer_generators_blockwise():
    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k")
    result = vectorizer.fit_transform(
        distributions_data_generator,
        vectors=vectors_data_generator,
        reference_distribution=generator_reference_dist,
        reference_vectors=generator_reference_vectors,
        n_distributions=distributions_data.shape[0],
        vector_dim=vectors_data.shape[1],
    )
    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    transform_result = vectorizer.transform(
        distributions_data_generator,
        vectors=vectors_data_generator,
        n_distributions=distributions_data.shape[0],
        vector_dim=vectors_data.shape[1],
    )
    assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
示例#4
0
def test_wasserstein_vectorizer_list_compared_to_sparse():
    lil_data = normalize(distributions_data.astype(np.float64), norm="l1").tolil()
    distributions = [np.array(x) for x in lil_data.data]
    vectors = [vectors_data[x] for x in lil_data.rows]
    vectorizer_sparse = WassersteinVectorizer(random_state=42)
    result_sparse = vectorizer_sparse.fit_transform(
        distributions_data, vectors=vectors_data
    )
    vectorizer_list = WassersteinVectorizer(random_state=42)
    result_list = vectorizer_list.fit_transform(
        distributions,
        vectors=vectors,
        reference_distribution=vectorizer_sparse.reference_distribution_,
        reference_vectors=vectorizer_sparse.reference_vectors_,
    )
    assert np.allclose(result_sparse, result_list, rtol=1e-3, atol=1e-6)
示例#5
0
def test_wasserstein_vectorizer_generator_compared_to_sparse():
    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    vectorizer_sparse = WassersteinVectorizer(random_state=42)
    result_sparse = vectorizer_sparse.fit_transform(distributions_data,
                                                    vectors=vectors_data)
    vectorizer_gen = WassersteinVectorizer(random_state=42)
    result_list = vectorizer_gen.fit_transform(
        distributions_data_generator,
        vectors=vectors_data_generator,
        reference_distribution=vectorizer_sparse.reference_distribution_,
        reference_vectors=vectorizer_sparse.reference_vectors_,
        n_distributions=distributions_data.shape[0],
        vector_dim=vectors_data.shape[1],
    )
    assert np.allclose(result_sparse, result_list, rtol=1e-3, atol=1e-6)
示例#6
0
def test_wasserstein_based_vectorizer_bad_params(wasserstein_class):
    with pytest.raises(ValueError):
        vectorizer = wasserstein_class()
        vectorizer.fit(distributions_data)

    with pytest.raises(ValueError):
        vectorizer = wasserstein_class()
        vectorizer.fit(mixed_token_data, vectors=vectors_data)

    with pytest.raises(ValueError):
        vectorizer = wasserstein_class()
        vectorizer.fit(point_data, vectors=vectors_data)

    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    with pytest.raises(ValueError):
        vectorizer = WassersteinVectorizer()
        vectorizer.fit(distributions_data_generator, vectors=vectors_data_generator)

    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    with pytest.raises(ValueError):
        vectorizer = WassersteinVectorizer()
        vectorizer.fit(
            distributions_data_generator,
            vectors=vectors_data_generator,
            reference_vectors=np.random.random((10, vectors_data.shape[1])),
        )

    distributions_data_generator = (x for x in distributions_data_list)
    vectors_data_generator = (x for x in vectors_data_list)
    with pytest.raises(ValueError):
        vectorizer = WassersteinVectorizer(reference_size=20)
        vectorizer.fit(
            distributions_data_generator,
            vectors=vectors_data_generator,
            reference_vectors=np.random.random((10, vectors_data.shape[1])),
        )
示例#7
0
def test_wasserstein_vectorizer_blockwise():
    vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k")
    result = vectorizer.fit_transform(distributions_data, vectors=vectors_data)
    transform_result = vectorizer.transform(distributions_data, vectors=vectors_data)
    assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)