Exemplo n.º 1
0
def test_dense_vs_sparse_match(match_kwargs, nnz_ratio):
    N = 100
    D = 256
    sp_embed = np.random.random([N, D])
    sp_embed[sp_embed > nnz_ratio] = 0

    da1 = DocumentArray.empty(N)
    da2 = DocumentArray.empty(N)

    # use sparse embedding
    da1.embeddings = sp.coo_matrix(sp_embed)
    da1.texts = [str(j) for j in range(N)]
    size_sp = sum(d.nbytes for d in da1)
    da1.match(da1, **match_kwargs)

    sparse_result = [m.text for m in da1[0].matches]

    # use dense embedding
    da2.embeddings = sp_embed
    da2.texts = [str(j) for j in range(N)]
    size_dense = sum(d.nbytes for d in da2)
    da2.match(da2, **match_kwargs)
    dense_result = [m.text for m in da2[0].matches]

    assert sparse_result == dense_result

    print(
        f'sparse DA: {size_sp} bytes is {size_sp / size_dense * 100:.0f}% of dense DA {size_dense} bytes'
    )
Exemplo n.º 2
0
def test_zero_embeddings():
    a = np.zeros([10, 6])
    da = DocumentArray.empty(10)

    # all zero, dense
    da.embeddings = a
    np.testing.assert_almost_equal(da.embeddings, a)
    for d in da:
        assert d.embedding.shape == (6,)

    # all zero, sparse
    sp_a = scipy.sparse.coo_matrix(a)
    da.embeddings = sp_a
    np.testing.assert_almost_equal(da.embeddings.todense(), sp_a.todense())
    for d in da:
        # scipy sparse row-vector can only be a (1, m) not squeezible
        assert d.embedding.shape == (1, 6)

    # near zero, sparse
    a = np.random.random([10, 6])
    a[a > 0.1] = 0
    sp_a = scipy.sparse.coo_matrix(a)
    da.embeddings = sp_a
    np.testing.assert_almost_equal(da.embeddings.todense(), sp_a.todense())
    for d in da:
        # scipy sparse row-vector can only be a (1, m) not squeezible
        assert d.embedding.shape == (1, 6)
Exemplo n.º 3
0
def run():
    f = Flow().add(uses='executor1/config.yml')
    # or load from Flow YAML
    # f = Flow.load_config('flow.yml')
    with f:
        da = f.post('/', DocumentArray.empty(2))
        print(da.texts)
Exemplo n.º 4
0
def test_ravel_embeddings_blobs(ndarray_val, attr, is_sparse):
    da = DocumentArray.empty(10)
    setattr(da, attr, ndarray_val)

    ndav = getattr(da, attr)

    # test read/getter
    assert type(ndav) is type(ndarray_val)

    if is_sparse:
        if hasattr(ndav, 'todense'):
            ndav = (ndav.todense(), )
            ndarray_val = ndarray_val.todense()
        if hasattr(ndav, 'to_dense'):
            ndav = (ndav.to_dense(), )
            ndarray_val = ndarray_val.to_dense()
        if isinstance(ndav, tf.SparseTensor):
            ndav = tf.sparse.to_dense(ndav)
            ndarray_val = tf.sparse.to_dense(ndarray_val)

    if isinstance(ndav, tuple):
        ndav = ndav[0]
    if hasattr(ndav, 'numpy'):
        ndav = ndav.numpy()
        ndarray_val = ndarray_val.numpy()

    np.testing.assert_almost_equal(ndav, ndarray_val)
Exemplo n.º 5
0
def test_add(da):
    oid = id(da)
    dap = DocumentArray.empty(10)
    da = da + dap
    assert len(da) == N + len(dap)
    nid = id(da)
    assert nid != oid
Exemplo n.º 6
0
def test_bsr_coo_unravel(sparse_cls):
    a = np.random.random([10, 72])
    a[a > 0.5] = 0

    da = DocumentArray.empty(10)
    for d, a_row in zip(da, a):
        d.embedding = sparse_cls(a_row)

    np.testing.assert_almost_equal(a, da.embeddings.todense())
Exemplo n.º 7
0
def test_diff_framework_match(ndarray_val):
    da = DocumentArray.empty(10)
    da.embeddings = ndarray_val
    da.match(da)
Exemplo n.º 8
0
import os

import pytest

from jina import DocumentArray, Flow

os.environ['JINA_GRPC_SEND_BYTES'] = '0'
os.environ['JINA_GRPC_RECV_BYTES'] = '0'


@pytest.mark.parametrize('inputs', [None, DocumentArray.empty(10)])
def test_grpc_census(inputs):
    assert int(os.environ.get('JINA_GRPC_SEND_BYTES', 0)) == 0
    assert int(os.environ.get('JINA_GRPC_RECV_BYTES', 0)) == 0
    with Flow().add().add() as f:
        f.post(
            on='/',
            inputs=inputs,
        )
    assert int(os.environ['JINA_GRPC_SEND_BYTES']) > 0
    assert int(os.environ['JINA_GRPC_RECV_BYTES']) > 0
    # add some route info, so size must be larger
    assert int(os.environ['JINA_GRPC_SEND_BYTES']) < int(
        os.environ['JINA_GRPC_RECV_BYTES'])
    del os.environ['JINA_GRPC_SEND_BYTES']
    del os.environ['JINA_GRPC_RECV_BYTES']
Exemplo n.º 9
0
def da_for_batching():
    da = DocumentArray.empty(100)
    dam = DocumentArrayMemmap.empty(100)
    return da, dam
Exemplo n.º 10
0
def input_da_gen():
    for i in range(5):
        yield DocumentArray.empty(INPUT_DA_LEN)
        time.sleep(1)
Exemplo n.º 11
0
def da_and_dam(N):
    da = DocumentArray.empty(N)
    dam = DocumentArrayMemmap.empty(N)
    return da, dam
Exemplo n.º 12
0
def test_empty_non_zero():
    da = DocumentArray.empty(10)
    assert len(da) == 10
    dam = DocumentArrayMemmap.empty(10)
    assert len(dam) == 10
Exemplo n.º 13
0
def test_empty_zero():
    da = DocumentArray.empty()
    assert len(da) == 0
    dam = DocumentArrayMemmap.empty()
    assert len(dam) == 0