def test_faiss_query(setup, X, Y, metric): faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0) d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = fetch(*execute(d, i)) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch(), decimal=4) # test other index X2 = X.astype(np.float64) Y2 = y.astype(np.float64) faiss_index = build_faiss_index(X2, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y2, 5, nprobe=10) # test execute only execute(d, i)
def test_pairwise_distances_topk_execution(setup): rs = np.random.RandomState(0) raw_x = rs.rand(20, 5) raw_y = rs.rand(21, 5) x = mt.tensor(raw_x, chunk_size=11) y = mt.tensor(raw_y, chunk_size=12) d, i = pairwise_distances_topk(x, y, 3, metric='euclidean', return_index=True) result = fetch(*execute(d, i)) nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='euclidean') nn.fit(raw_y) expected = nn.kneighbors(raw_x, return_distance=True) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) x = mt.tensor(raw_x, chunk_size=(11, 3)) d = pairwise_distances_topk(x, k=4, metric='euclidean', return_index=False) result = d.execute().fetch() nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='euclidean') nn.fit(raw_x) expected = nn.kneighbors(return_distance=True)[0] np.testing.assert_almost_equal(result[:, 1:], expected) y = mt.tensor(raw_y, chunk_size=21) d, i = pairwise_distances_topk(x, y, 3, metric='cosine', return_index=True, working_memory='168') result = fetch(*execute(d, i)) nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine') nn.fit(raw_y) expected = nn.kneighbors(raw_x, return_distance=True) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) d = pairwise_distances_topk(x, y, 3, metric='cosine', axis=0, return_index=False) result = d.execute().fetch() nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine') nn.fit(raw_x) expected = nn.kneighbors(raw_y, return_distance=True)[0] np.testing.assert_almost_equal(result, expected)
def test_frexp_execution(setup): data1 = np.random.RandomState(0).randint(0, 100, (5, 9, 6)) arr1 = tensor(data1.copy(), chunk_size=4) o1, o2 = frexp(arr1) o = o1 + o2 res = o.execute().fetch() expected = sum(np.frexp(data1)) np.testing.assert_array_almost_equal(res, expected) arr1 = tensor(data1.copy(), chunk_size=4) o1 = zeros(data1.shape, chunk_size=4) o2 = zeros(data1.shape, dtype='i8', chunk_size=4) frexp(arr1, o1, o2) res1, res2 = fetch(*execute(o1, o2)) res = res1 * 2 ** res2 np.testing.assert_array_almost_equal(res, data1, decimal=3) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=4) o1, o2 = frexp(arr1) o = o1 + o2 res = o.execute().fetch() expected = sum(np.frexp(data1.toarray())) np.testing.assert_equal(res.toarray(), expected)
def test_modf_order_execution(setup): data1 = np.random.random((5, 9)) t = tensor(data1, chunk_size=3) o1, o2 = modf(t, order='F') res1, res2 = execute(o1, o2) expected1, expected2 = np.modf(data1, order='F') np.testing.assert_allclose(res1, expected1) assert res1.flags['F_CONTIGUOUS'] is True assert res1.flags['C_CONTIGUOUS'] is False np.testing.assert_allclose(res2, expected2) assert res2.flags['F_CONTIGUOUS'] is True assert res2.flags['C_CONTIGUOUS'] is False
def test_fetch_log(fetch_log_setup): def f(): print('test') r = mr.spawn(f) r.execute() log = r.fetch_log() assert str(log).strip() == 'test' # test multiple functions def f1(size): print('f1' * size) sys.stdout.flush() fs = mr.ExecutableTuple([mr.spawn(f1, 30), mr.spawn(f1, 40)]) execute(*fs) log = fetch_log(*fs, offsets=20, sizes=10) assert str(log[0]).strip() == ('f1' * 30)[20:30] assert str(log[1]).strip() == ('f1' * 40)[20:30] assert len(log[0].offsets) > 0 assert all(s > 0 for s in log[0].offsets) assert len(log[1].offsets) > 0 assert all(s > 0 for s in log[1].offsets) assert len(log[0].chunk_op_keys) > 0 # test negative offsets log = fs.fetch_log(offsets=-20, sizes=10) assert str(log[0]).strip() == ('f1' * 30 + '\n')[-20:-10] assert str(log[1]).strip() == ('f1' * 40 + '\n')[-20:-10] assert all(s > 0 for s in log[0].offsets) is True assert len(log[1].offsets) > 0 assert all(s > 0 for s in log[1].offsets) is True assert len(log[0].chunk_op_keys) > 0 # test negative offsets which represented in string log = fetch_log(*fs, offsets='-0.02K', sizes='0.01K') assert str(log[0]).strip() == ('f1' * 30 + '\n')[-20:-10] assert str(log[1]).strip() == ('f1' * 40 + '\n')[-20:-10] assert all(s > 0 for s in log[0].offsets) is True assert len(log[1].offsets) > 0 assert all(s > 0 for s in log[1].offsets) is True assert len(log[0].chunk_op_keys) > 0 def test_nested(): print('level0') fr = mr.spawn(f1, 1) fr.execute() print(fr.fetch_log()) r = mr.spawn(test_nested) r.execute() log = str(r.fetch_log()) assert 'level0' in log assert 'f1' in log df = md.DataFrame(mt.random.rand(10, 3), chunk_size=5) def df_func(c): print('df func') return c df2 = df.map_chunk(df_func) df2.execute() log = df2.fetch_log() assert 'Chunk op key:' in str(log) assert 'df func' in repr(log) assert len(str(df.fetch_log())) == 0 def test_host(rndf): rm = mr.spawn(nested, rndf) rm.execute() print(rm.fetch_log()) def nested(_rndf): print('log_content') ds = [ mr.spawn(test_host, n, retry_when_fail=False) for n in np.random.rand(4) ] xtp = execute(*ds) for log in fetch_log(*xtp): assert str(log).strip() == 'log_content' def test_threaded(): import threading exc_info = None def print_fun(): nonlocal exc_info try: print('inner') except: # noqa: E722 # nosec # pylint: disable=bare-except exc_info = sys.exc_info() print_thread = threading.Thread(target=print_fun) print_thread.start() print_thread.join() if exc_info is not None: raise exc_info[1].with_traceback(exc_info[-1]) print('after') rm = mr.spawn(test_threaded) rm.execute() logs = str(rm.fetch_log()).strip() assert logs == 'inner\nafter'