def test_wasserstein_vectorizer_lists(): vectorizer = WassersteinVectorizer(random_state=42) result = vectorizer.fit_transform(distributions_data_list, vectors=vectors_data_list) transform_result = vectorizer.transform(distributions_data_list, vectors=vectors_data_list) assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
def test_wasserstein_vectorizer_list_based_blockwise(): lil_data = normalize(distributions_data, norm="l1").tolil() distributions = [np.array(x) for x in lil_data.data] vectors = [vectors_data[x] for x in lil_data.rows] vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k") result = vectorizer.fit_transform(distributions, vectors=vectors) transform_result = vectorizer.transform(distributions, vectors=vectors) assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
def test_wasserstein_vectorizer_generators_blockwise(): distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k") result = vectorizer.fit_transform( distributions_data_generator, vectors=vectors_data_generator, reference_distribution=generator_reference_dist, reference_vectors=generator_reference_vectors, n_distributions=distributions_data.shape[0], vector_dim=vectors_data.shape[1], ) distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) transform_result = vectorizer.transform( distributions_data_generator, vectors=vectors_data_generator, n_distributions=distributions_data.shape[0], vector_dim=vectors_data.shape[1], ) assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)
def test_wasserstein_vectorizer_list_compared_to_sparse(): lil_data = normalize(distributions_data.astype(np.float64), norm="l1").tolil() distributions = [np.array(x) for x in lil_data.data] vectors = [vectors_data[x] for x in lil_data.rows] vectorizer_sparse = WassersteinVectorizer(random_state=42) result_sparse = vectorizer_sparse.fit_transform( distributions_data, vectors=vectors_data ) vectorizer_list = WassersteinVectorizer(random_state=42) result_list = vectorizer_list.fit_transform( distributions, vectors=vectors, reference_distribution=vectorizer_sparse.reference_distribution_, reference_vectors=vectorizer_sparse.reference_vectors_, ) assert np.allclose(result_sparse, result_list, rtol=1e-3, atol=1e-6)
def test_wasserstein_vectorizer_generator_compared_to_sparse(): distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) vectorizer_sparse = WassersteinVectorizer(random_state=42) result_sparse = vectorizer_sparse.fit_transform(distributions_data, vectors=vectors_data) vectorizer_gen = WassersteinVectorizer(random_state=42) result_list = vectorizer_gen.fit_transform( distributions_data_generator, vectors=vectors_data_generator, reference_distribution=vectorizer_sparse.reference_distribution_, reference_vectors=vectorizer_sparse.reference_vectors_, n_distributions=distributions_data.shape[0], vector_dim=vectors_data.shape[1], ) assert np.allclose(result_sparse, result_list, rtol=1e-3, atol=1e-6)
def test_wasserstein_based_vectorizer_bad_params(wasserstein_class): with pytest.raises(ValueError): vectorizer = wasserstein_class() vectorizer.fit(distributions_data) with pytest.raises(ValueError): vectorizer = wasserstein_class() vectorizer.fit(mixed_token_data, vectors=vectors_data) with pytest.raises(ValueError): vectorizer = wasserstein_class() vectorizer.fit(point_data, vectors=vectors_data) distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) with pytest.raises(ValueError): vectorizer = WassersteinVectorizer() vectorizer.fit(distributions_data_generator, vectors=vectors_data_generator) distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) with pytest.raises(ValueError): vectorizer = WassersteinVectorizer() vectorizer.fit( distributions_data_generator, vectors=vectors_data_generator, reference_vectors=np.random.random((10, vectors_data.shape[1])), ) distributions_data_generator = (x for x in distributions_data_list) vectors_data_generator = (x for x in vectors_data_list) with pytest.raises(ValueError): vectorizer = WassersteinVectorizer(reference_size=20) vectorizer.fit( distributions_data_generator, vectors=vectors_data_generator, reference_vectors=np.random.random((10, vectors_data.shape[1])), )
def test_wasserstein_vectorizer_blockwise(): vectorizer = WassersteinVectorizer(random_state=42, memory_size="50k") result = vectorizer.fit_transform(distributions_data, vectors=vectors_data) transform_result = vectorizer.transform(distributions_data, vectors=vectors_data) assert np.allclose(result, transform_result, rtol=1e-3, atol=1e-6)