def test_pairwise_distances_topk_execution(setup): rs = np.random.RandomState(0) raw_x = rs.rand(20, 5) raw_y = rs.rand(21, 5) x = mt.tensor(raw_x, chunk_size=11) y = mt.tensor(raw_y, chunk_size=12) d, i = pairwise_distances_topk(x, y, 3, metric='euclidean', return_index=True) result = fetch(*execute(d, i)) nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='euclidean') nn.fit(raw_y) expected = nn.kneighbors(raw_x, return_distance=True) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) x = mt.tensor(raw_x, chunk_size=(11, 3)) d = pairwise_distances_topk(x, k=4, metric='euclidean', return_index=False) result = d.execute().fetch() nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='euclidean') nn.fit(raw_x) expected = nn.kneighbors(return_distance=True)[0] np.testing.assert_almost_equal(result[:, 1:], expected) y = mt.tensor(raw_y, chunk_size=21) d, i = pairwise_distances_topk(x, y, 3, metric='cosine', return_index=True, working_memory='168') result = fetch(*execute(d, i)) nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine') nn.fit(raw_y) expected = nn.kneighbors(raw_x, return_distance=True) np.testing.assert_almost_equal(result[0], expected[0]) np.testing.assert_array_equal(result[1], expected[1]) d = pairwise_distances_topk(x, y, 3, metric='cosine', axis=0, return_index=False) result = d.execute().fetch() nn = SkNearestNeighbors(n_neighbors=3, algorithm='brute', metric='cosine') nn.fit(raw_x) expected = nn.kneighbors(raw_y, return_distance=True)[0] np.testing.assert_almost_equal(result, expected)
def test_session_async_execute(setup): raw_a = np.random.RandomState(0).rand(10, 20) a = mt.tensor(raw_a) expected = raw_a.sum() res = a.sum().to_numpy(wait=False).result() assert expected == res res = a.sum().execute(wait=False) res = res.result().fetch() assert expected == res raw_df = pd.DataFrame(raw_a) expected = raw_df.sum() df = md.DataFrame(a) res = df.sum().to_pandas(wait=False).result() pd.testing.assert_series_equal(expected, res) res = df.sum().execute(wait=False) res = res.result().fetch() pd.testing.assert_series_equal(expected, res) t = [df.sum(), a.sum()] res = mt.ExecutableTuple(t).to_object(wait=False).result() pd.testing.assert_series_equal(raw_df.sum(), res[0]) assert raw_a.sum() == res[1] res = mt.ExecutableTuple(t).execute(wait=False) res = fetch(*res.result()) pd.testing.assert_series_equal(raw_df.sum(), res[0]) assert raw_a.sum() == res[1]
def test_faiss_query(setup, X, Y, metric): faiss_index = build_faiss_index(X, 'Flat', None, metric=metric, random_state=0) d, i = faiss_query(faiss_index, Y, 5, nprobe=10) distance, indices = fetch(*execute(d, i)) nn = NearestNeighbors(metric=metric) nn.fit(x) expected_distance, expected_indices = nn.kneighbors(y, 5) np.testing.assert_array_equal(indices, expected_indices.fetch()) np.testing.assert_almost_equal(distance, expected_distance.fetch(), decimal=4) # test other index X2 = X.astype(np.float64) Y2 = y.astype(np.float64) faiss_index = build_faiss_index(X2, 'PCAR6,IVF8_HNSW32,SQ8', 10, random_state=0, return_index_type='object') d, i = faiss_query(faiss_index, Y2, 5, nprobe=10) # test execute only execute(d, i)
def test_frexp_execution(setup): data1 = np.random.RandomState(0).randint(0, 100, (5, 9, 6)) arr1 = tensor(data1.copy(), chunk_size=4) o1, o2 = frexp(arr1) o = o1 + o2 res = o.execute().fetch() expected = sum(np.frexp(data1)) np.testing.assert_array_almost_equal(res, expected) arr1 = tensor(data1.copy(), chunk_size=4) o1 = zeros(data1.shape, chunk_size=4) o2 = zeros(data1.shape, dtype='i8', chunk_size=4) frexp(arr1, o1, o2) res1, res2 = fetch(*execute(o1, o2)) res = res1 * 2 ** res2 np.testing.assert_array_almost_equal(res, data1, decimal=3) data1 = sps.random(5, 9, density=.1) arr1 = tensor(data1.copy(), chunk_size=4) o1, o2 = frexp(arr1) o = o1 + o2 res = o.execute().fetch() expected = sum(np.frexp(data1.toarray())) np.testing.assert_equal(res.toarray(), expected)