示例#1
0
def test_pairwise_distances_chunked():
    # Test the pairwise_distance helper function.
    rng = np.random.RandomState(0)
    # Euclidean distance should be equivalent to calling the function.
    X = rng.random_sample((200, 4))
    check_pairwise_distances_chunked(X,
                                     None,
                                     working_memory=1,
                                     metric='euclidean')
    # Test small amounts of memory
    for power in range(-16, 0):
        check_pairwise_distances_chunked(X,
                                         None,
                                         working_memory=2**power,
                                         metric='euclidean')
    # X as list
    check_pairwise_distances_chunked(X.tolist(),
                                     None,
                                     working_memory=1,
                                     metric='euclidean')
    # Euclidean distance, with Y != X.
    Y = rng.random_sample((100, 4))
    check_pairwise_distances_chunked(X,
                                     Y,
                                     working_memory=1,
                                     metric='euclidean')
    check_pairwise_distances_chunked(X.tolist(),
                                     Y.tolist(),
                                     working_memory=1,
                                     metric='euclidean')
    # absurdly large working_memory
    check_pairwise_distances_chunked(X,
                                     Y,
                                     working_memory=10000,
                                     metric='euclidean')
    # "cityblock" uses scikit-learn metric, cityblock (function) is
    # scipy.spatial.
    check_pairwise_distances_chunked(X,
                                     Y,
                                     working_memory=1,
                                     metric='cityblock')
    # Test that a value error is raised if the metric is unknown
    assert_raises(ValueError, next,
                  pairwise_distances_chunked(X, Y, metric="blah"))

    # Test precomputed returns all at once
    D = pairwise_distances(X)
    gen = pairwise_distances_chunked(D,
                                     working_memory=2**-16,
                                     metric='precomputed')
    assert isinstance(gen, GeneratorType)
    assert next(gen) is D
    assert_raises(StopIteration, next, gen)
示例#2
0
def test_pairwise_distances_chunked_diagonal(metric):
    rng = np.random.RandomState(0)
    X = rng.normal(size=(1000, 10), scale=1e10)
    chunks = list(
        pairwise_distances_chunked(X, working_memory=1, metric=metric))
    assert len(chunks) > 1
    assert_array_almost_equal(np.diag(np.vstack(chunks)), 0, decimal=10)
示例#3
0
def test_pairwise_distances_chunked_reduce_valid(good_reduce):
    X = np.arange(10).reshape(-1, 1)
    S_chunks = pairwise_distances_chunked(X,
                                          None,
                                          reduce_func=good_reduce,
                                          working_memory=64)
    next(S_chunks)
示例#4
0
def test_pairwise_distances_chunked_reduce_invalid(bad_reduce, err_type,
                                                   message):
    X = np.arange(10).reshape(-1, 1)
    S_chunks = pairwise_distances_chunked(X,
                                          None,
                                          reduce_func=bad_reduce,
                                          working_memory=64)
    assert_raises_regexp(err_type, message, next, S_chunks)
示例#5
0
def test_pairwise_distances_chunked_reduce():
    rng = np.random.RandomState(0)
    X = rng.random_sample((400, 4))
    # Reduced Euclidean distance
    S = pairwise_distances(X)[:, :100]
    S_chunks = pairwise_distances_chunked(X,
                                          None,
                                          reduce_func=_reduce_func,
                                          working_memory=2**-16)
    assert isinstance(S_chunks, GeneratorType)
    S_chunks = list(S_chunks)
    assert len(S_chunks) > 1
    # atol is for diagonal where S is explicitly zeroed on the diagonal
    assert_allclose(np.vstack(S_chunks), S, atol=1e-7)
示例#6
0
def check_pairwise_distances_chunked(X, Y, working_memory, metric='euclidean'):
    gen = pairwise_distances_chunked(X,
                                     Y,
                                     working_memory=working_memory,
                                     metric=metric)
    assert isinstance(gen, GeneratorType)
    blockwise_distances = list(gen)
    Y = X if Y is None else Y
    min_block_mib = len(Y) * 8 * 2**-20

    for block in blockwise_distances:
        memory_used = block.nbytes
        assert memory_used <= max(working_memory, min_block_mib) * 2**20

    blockwise_distances = np.vstack(blockwise_distances)
    S = pairwise_distances(X, Y, metric=metric)
    assert_array_almost_equal(blockwise_distances, S)