Пример #1
0
def test_faiss_query(setup, X, Y, metric):
    faiss_index = build_faiss_index(X,
                                    'Flat',
                                    None,
                                    metric=metric,
                                    random_state=0)
    d, i = faiss_query(faiss_index, Y, 5, nprobe=10)
    distance, indices = fetch(*execute(d, i))

    nn = NearestNeighbors(metric=metric)
    nn.fit(x)
    expected_distance, expected_indices = nn.kneighbors(y, 5)

    np.testing.assert_array_equal(indices, expected_indices.fetch())
    np.testing.assert_almost_equal(distance,
                                   expected_distance.fetch(),
                                   decimal=4)

    # test other index
    X2 = X.astype(np.float64)
    Y2 = y.astype(np.float64)
    faiss_index = build_faiss_index(X2,
                                    'PCAR6,IVF8_HNSW32,SQ8',
                                    10,
                                    random_state=0,
                                    return_index_type='object')
    d, i = faiss_query(faiss_index, Y2, 5, nprobe=10)
    # test execute only
    execute(d, i)
Пример #2
0
def test_pairwise_distances_topk_execution(setup):
    rs = np.random.RandomState(0)
    raw_x = rs.rand(20, 5)
    raw_y = rs.rand(21, 5)

    x = mt.tensor(raw_x, chunk_size=11)
    y = mt.tensor(raw_y, chunk_size=12)

    d, i = pairwise_distances_topk(x,
                                   y,
                                   3,
                                   metric='euclidean',
                                   return_index=True)
    result = fetch(*execute(d, i))
    nn = SkNearestNeighbors(n_neighbors=3,
                            algorithm='brute',
                            metric='euclidean')
    nn.fit(raw_y)
    expected = nn.kneighbors(raw_x, return_distance=True)
    np.testing.assert_almost_equal(result[0], expected[0])
    np.testing.assert_array_equal(result[1], expected[1])

    x = mt.tensor(raw_x, chunk_size=(11, 3))

    d = pairwise_distances_topk(x, k=4, metric='euclidean', return_index=False)
    result = d.execute().fetch()
    nn = SkNearestNeighbors(n_neighbors=3,
                            algorithm='brute',
                            metric='euclidean')
    nn.fit(raw_x)
    expected = nn.kneighbors(return_distance=True)[0]
    np.testing.assert_almost_equal(result[:, 1:], expected)

    y = mt.tensor(raw_y, chunk_size=21)

    d, i = pairwise_distances_topk(x,
                                   y,
                                   3,
                                   metric='cosine',
                                   return_index=True,
                                   working_memory='168')
    result = fetch(*execute(d, i))
    nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine')
    nn.fit(raw_y)
    expected = nn.kneighbors(raw_x, return_distance=True)
    np.testing.assert_almost_equal(result[0], expected[0])
    np.testing.assert_array_equal(result[1], expected[1])

    d = pairwise_distances_topk(x,
                                y,
                                3,
                                metric='cosine',
                                axis=0,
                                return_index=False)
    result = d.execute().fetch()
    nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine')
    nn.fit(raw_x)
    expected = nn.kneighbors(raw_y, return_distance=True)[0]
    np.testing.assert_almost_equal(result, expected)
Пример #3
0
def test_frexp_execution(setup):
    data1 = np.random.RandomState(0).randint(0, 100, (5, 9, 6))

    arr1 = tensor(data1.copy(), chunk_size=4)

    o1, o2 = frexp(arr1)
    o = o1 + o2

    res = o.execute().fetch()
    expected = sum(np.frexp(data1))
    np.testing.assert_array_almost_equal(res, expected)

    arr1 = tensor(data1.copy(), chunk_size=4)
    o1 = zeros(data1.shape, chunk_size=4)
    o2 = zeros(data1.shape, dtype='i8', chunk_size=4)
    frexp(arr1, o1, o2)
    res1, res2 = fetch(*execute(o1, o2))

    res = res1 * 2 ** res2
    np.testing.assert_array_almost_equal(res, data1, decimal=3)

    data1 = sps.random(5, 9, density=.1)

    arr1 = tensor(data1.copy(), chunk_size=4)

    o1, o2 = frexp(arr1)
    o = o1 + o2

    res = o.execute().fetch()
    expected = sum(np.frexp(data1.toarray()))
    np.testing.assert_equal(res.toarray(), expected)
Пример #4
0
def test_modf_order_execution(setup):
    data1 = np.random.random((5, 9))
    t = tensor(data1, chunk_size=3)

    o1, o2 = modf(t, order='F')
    res1, res2 = execute(o1, o2)
    expected1, expected2 = np.modf(data1, order='F')
    np.testing.assert_allclose(res1, expected1)
    assert res1.flags['F_CONTIGUOUS'] is True
    assert res1.flags['C_CONTIGUOUS'] is False
    np.testing.assert_allclose(res2, expected2)
    assert res2.flags['F_CONTIGUOUS'] is True
    assert res2.flags['C_CONTIGUOUS'] is False
Пример #5
0
def test_fetch_log(fetch_log_setup):
    def f():
        print('test')

    r = mr.spawn(f)
    r.execute()

    log = r.fetch_log()
    assert str(log).strip() == 'test'

    # test multiple functions
    def f1(size):
        print('f1' * size)
        sys.stdout.flush()

    fs = mr.ExecutableTuple([mr.spawn(f1, 30), mr.spawn(f1, 40)])
    execute(*fs)
    log = fetch_log(*fs, offsets=20, sizes=10)
    assert str(log[0]).strip() == ('f1' * 30)[20:30]
    assert str(log[1]).strip() == ('f1' * 40)[20:30]
    assert len(log[0].offsets) > 0
    assert all(s > 0 for s in log[0].offsets)
    assert len(log[1].offsets) > 0
    assert all(s > 0 for s in log[1].offsets)
    assert len(log[0].chunk_op_keys) > 0

    # test negative offsets
    log = fs.fetch_log(offsets=-20, sizes=10)
    assert str(log[0]).strip() == ('f1' * 30 + '\n')[-20:-10]
    assert str(log[1]).strip() == ('f1' * 40 + '\n')[-20:-10]
    assert all(s > 0 for s in log[0].offsets) is True
    assert len(log[1].offsets) > 0
    assert all(s > 0 for s in log[1].offsets) is True
    assert len(log[0].chunk_op_keys) > 0

    # test negative offsets which represented in string
    log = fetch_log(*fs, offsets='-0.02K', sizes='0.01K')
    assert str(log[0]).strip() == ('f1' * 30 + '\n')[-20:-10]
    assert str(log[1]).strip() == ('f1' * 40 + '\n')[-20:-10]
    assert all(s > 0 for s in log[0].offsets) is True
    assert len(log[1].offsets) > 0
    assert all(s > 0 for s in log[1].offsets) is True
    assert len(log[0].chunk_op_keys) > 0

    def test_nested():
        print('level0')
        fr = mr.spawn(f1, 1)
        fr.execute()
        print(fr.fetch_log())

    r = mr.spawn(test_nested)
    r.execute()
    log = str(r.fetch_log())
    assert 'level0' in log
    assert 'f1' in log

    df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5)

    def df_func(c):
        print('df func')
        return c

    df2 = df.map_chunk(df_func)
    df2.execute()
    log = df2.fetch_log()
    assert 'Chunk op key:' in str(log)
    assert 'df func' in repr(log)
    assert len(str(df.fetch_log())) == 0

    def test_host(rndf):
        rm = mr.spawn(nested, rndf)
        rm.execute()
        print(rm.fetch_log())

    def nested(_rndf):
        print('log_content')

    ds = [
        mr.spawn(test_host, n, retry_when_fail=False)
        for n in np.random.rand(4)
    ]
    xtp = execute(*ds)
    for log in fetch_log(*xtp):
        assert str(log).strip() == 'log_content'

    def test_threaded():
        import threading

        exc_info = None

        def print_fun():
            nonlocal exc_info
            try:
                print('inner')
            except:  # noqa: E722  # nosec  # pylint: disable=bare-except
                exc_info = sys.exc_info()

        print_thread = threading.Thread(target=print_fun)
        print_thread.start()
        print_thread.join()

        if exc_info is not None:
            raise exc_info[1].with_traceback(exc_info[-1])

        print('after')

    rm = mr.spawn(test_threaded)
    rm.execute()
    logs = str(rm.fetch_log()).strip()
    assert logs == 'inner\nafter'