def test_dense_vs_sparse_match(match_kwargs, nnz_ratio): N = 100 D = 256 sp_embed = np.random.random([N, D]) sp_embed[sp_embed > nnz_ratio] = 0 da1 = DocumentArray.empty(N) da2 = DocumentArray.empty(N) # use sparse embedding da1.embeddings = sp.coo_matrix(sp_embed) da1.texts = [str(j) for j in range(N)] size_sp = sum(d.nbytes for d in da1) da1.match(da1, **match_kwargs) sparse_result = [m.text for m in da1[0].matches] # use dense embedding da2.embeddings = sp_embed da2.texts = [str(j) for j in range(N)] size_dense = sum(d.nbytes for d in da2) da2.match(da2, **match_kwargs) dense_result = [m.text for m in da2[0].matches] assert sparse_result == dense_result print( f'sparse DA: {size_sp} bytes is {size_sp / size_dense * 100:.0f}% of dense DA {size_dense} bytes' )
def test_zero_embeddings(): a = np.zeros([10, 6]) da = DocumentArray.empty(10) # all zero, dense da.embeddings = a np.testing.assert_almost_equal(da.embeddings, a) for d in da: assert d.embedding.shape == (6,) # all zero, sparse sp_a = scipy.sparse.coo_matrix(a) da.embeddings = sp_a np.testing.assert_almost_equal(da.embeddings.todense(), sp_a.todense()) for d in da: # scipy sparse row-vector can only be a (1, m) not squeezible assert d.embedding.shape == (1, 6) # near zero, sparse a = np.random.random([10, 6]) a[a > 0.1] = 0 sp_a = scipy.sparse.coo_matrix(a) da.embeddings = sp_a np.testing.assert_almost_equal(da.embeddings.todense(), sp_a.todense()) for d in da: # scipy sparse row-vector can only be a (1, m) not squeezible assert d.embedding.shape == (1, 6)
def run(): f = Flow().add(uses='executor1/config.yml') # or load from Flow YAML # f = Flow.load_config('flow.yml') with f: da = f.post('/', DocumentArray.empty(2)) print(da.texts)
def test_ravel_embeddings_blobs(ndarray_val, attr, is_sparse): da = DocumentArray.empty(10) setattr(da, attr, ndarray_val) ndav = getattr(da, attr) # test read/getter assert type(ndav) is type(ndarray_val) if is_sparse: if hasattr(ndav, 'todense'): ndav = (ndav.todense(), ) ndarray_val = ndarray_val.todense() if hasattr(ndav, 'to_dense'): ndav = (ndav.to_dense(), ) ndarray_val = ndarray_val.to_dense() if isinstance(ndav, tf.SparseTensor): ndav = tf.sparse.to_dense(ndav) ndarray_val = tf.sparse.to_dense(ndarray_val) if isinstance(ndav, tuple): ndav = ndav[0] if hasattr(ndav, 'numpy'): ndav = ndav.numpy() ndarray_val = ndarray_val.numpy() np.testing.assert_almost_equal(ndav, ndarray_val)
def test_add(da): oid = id(da) dap = DocumentArray.empty(10) da = da + dap assert len(da) == N + len(dap) nid = id(da) assert nid != oid
def test_bsr_coo_unravel(sparse_cls): a = np.random.random([10, 72]) a[a > 0.5] = 0 da = DocumentArray.empty(10) for d, a_row in zip(da, a): d.embedding = sparse_cls(a_row) np.testing.assert_almost_equal(a, da.embeddings.todense())
def test_diff_framework_match(ndarray_val): da = DocumentArray.empty(10) da.embeddings = ndarray_val da.match(da)
import os import pytest from jina import DocumentArray, Flow os.environ['JINA_GRPC_SEND_BYTES'] = '0' os.environ['JINA_GRPC_RECV_BYTES'] = '0' @pytest.mark.parametrize('inputs', [None, DocumentArray.empty(10)]) def test_grpc_census(inputs): assert int(os.environ.get('JINA_GRPC_SEND_BYTES', 0)) == 0 assert int(os.environ.get('JINA_GRPC_RECV_BYTES', 0)) == 0 with Flow().add().add() as f: f.post( on='/', inputs=inputs, ) assert int(os.environ['JINA_GRPC_SEND_BYTES']) > 0 assert int(os.environ['JINA_GRPC_RECV_BYTES']) > 0 # add some route info, so size must be larger assert int(os.environ['JINA_GRPC_SEND_BYTES']) < int( os.environ['JINA_GRPC_RECV_BYTES']) del os.environ['JINA_GRPC_SEND_BYTES'] del os.environ['JINA_GRPC_RECV_BYTES']
def da_for_batching(): da = DocumentArray.empty(100) dam = DocumentArrayMemmap.empty(100) return da, dam
def input_da_gen(): for i in range(5): yield DocumentArray.empty(INPUT_DA_LEN) time.sleep(1)
def da_and_dam(N): da = DocumentArray.empty(N) dam = DocumentArrayMemmap.empty(N) return da, dam
def test_empty_non_zero(): da = DocumentArray.empty(10) assert len(da) == 10 dam = DocumentArrayMemmap.empty(10) assert len(dam) == 10
def test_empty_zero(): da = DocumentArray.empty() assert len(da) == 0 dam = DocumentArrayMemmap.empty() assert len(dam) == 0