def test_pairwise_distances_chunked(): # Test the pairwise_distance helper function. rng = np.random.RandomState(0) # Euclidean distance should be equivalent to calling the function. X = rng.random_sample((200, 4)) check_pairwise_distances_chunked(X, None, working_memory=1, metric='euclidean') # Test small amounts of memory for power in range(-16, 0): check_pairwise_distances_chunked(X, None, working_memory=2**power, metric='euclidean') # X as list check_pairwise_distances_chunked(X.tolist(), None, working_memory=1, metric='euclidean') # Euclidean distance, with Y != X. Y = rng.random_sample((100, 4)) check_pairwise_distances_chunked(X, Y, working_memory=1, metric='euclidean') check_pairwise_distances_chunked(X.tolist(), Y.tolist(), working_memory=1, metric='euclidean') # absurdly large working_memory check_pairwise_distances_chunked(X, Y, working_memory=10000, metric='euclidean') # "cityblock" uses scikit-learn metric, cityblock (function) is # scipy.spatial. check_pairwise_distances_chunked(X, Y, working_memory=1, metric='cityblock') # Test that a value error is raised if the metric is unknown assert_raises(ValueError, next, pairwise_distances_chunked(X, Y, metric="blah")) # Test precomputed returns all at once D = pairwise_distances(X) gen = pairwise_distances_chunked(D, working_memory=2**-16, metric='precomputed') assert isinstance(gen, GeneratorType) assert next(gen) is D assert_raises(StopIteration, next, gen)
def test_pairwise_distances_chunked_diagonal(metric): rng = np.random.RandomState(0) X = rng.normal(size=(1000, 10), scale=1e10) chunks = list( pairwise_distances_chunked(X, working_memory=1, metric=metric)) assert len(chunks) > 1 assert_array_almost_equal(np.diag(np.vstack(chunks)), 0, decimal=10)
def test_pairwise_distances_chunked_reduce_valid(good_reduce): X = np.arange(10).reshape(-1, 1) S_chunks = pairwise_distances_chunked(X, None, reduce_func=good_reduce, working_memory=64) next(S_chunks)
def test_pairwise_distances_chunked_reduce_invalid(bad_reduce, err_type, message): X = np.arange(10).reshape(-1, 1) S_chunks = pairwise_distances_chunked(X, None, reduce_func=bad_reduce, working_memory=64) assert_raises_regexp(err_type, message, next, S_chunks)
def test_pairwise_distances_chunked_reduce(): rng = np.random.RandomState(0) X = rng.random_sample((400, 4)) # Reduced Euclidean distance S = pairwise_distances(X)[:, :100] S_chunks = pairwise_distances_chunked(X, None, reduce_func=_reduce_func, working_memory=2**-16) assert isinstance(S_chunks, GeneratorType) S_chunks = list(S_chunks) assert len(S_chunks) > 1 # atol is for diagonal where S is explicitly zeroed on the diagonal assert_allclose(np.vstack(S_chunks), S, atol=1e-7)
def check_pairwise_distances_chunked(X, Y, working_memory, metric='euclidean'): gen = pairwise_distances_chunked(X, Y, working_memory=working_memory, metric=metric) assert isinstance(gen, GeneratorType) blockwise_distances = list(gen) Y = X if Y is None else Y min_block_mib = len(Y) * 8 * 2**-20 for block in blockwise_distances: memory_used = block.nbytes assert memory_used <= max(working_memory, min_block_mib) * 2**20 blockwise_distances = np.vstack(blockwise_distances) S = pairwise_distances(X, Y, metric=metric) assert_array_almost_equal(blockwise_distances, S)