示例#1
0
def test_normal(docs):
    NUM_REPLICAS = 3
    NUM_SHARDS = 2
    doc_id_path = collections.OrderedDict()

    def handle_search_result(resp):
        for doc in resp.data.docs:
            doc_id_path[int(doc.id)] = (doc.tags['replica'], doc.tags['shard'])

    flow = Flow().add(
        name='pod1',
        uses=DummyMarkExecutor,
        replicas=NUM_REPLICAS,
        parallel=NUM_SHARDS,
    )
    with flow:
        flow.search(inputs=docs, request_size=1, on_done=handle_search_result)

    assert len(doc_id_path.keys()) == len(docs)

    num_used_replicas = len(set(map(lambda x: x[0], doc_id_path.values())))
    assert num_used_replicas == NUM_REPLICAS

    shards = collections.defaultdict(list)
    for replica, shard in doc_id_path.values():
        shards[replica].append(shard)

    assert len(shards.keys()) == NUM_REPLICAS

    for shard_list in shards.values():
        assert len(set(shard_list)) == NUM_SHARDS
示例#2
0
def test_sparse_pipeline(mocker, docs_to_index):
    def validate(response):
        assert len(response.data.docs) == 10
        for doc in response.data.docs:
            for i, match in enumerate(doc.matches):
                assert match.id == docs_to_index[i].id
                assert isinstance(match.embedding, sparse.coo_matrix)

    f = Flow().add(uses=DummyCSRSparseIndexEncoder)

    mock = mocker.Mock()
    error_mock = mocker.Mock()

    with f:
        f.index(
            inputs=docs_to_index,
            on_done=mock,
        )
        f.search(
            inputs=docs_to_index[0],
            parameters={
                'doc': docs_to_index[0],
                'top_k': 1
            },
            on_done=mock,
            on_error=error_mock,
        )

    mock.assert_called_once()
    validate_callback(mock, validate)
    error_mock.assert_not_called()
示例#3
0
def test_normal(docs):
    NUM_REPLICAS = 3
    NUM_SHARDS = 2
    doc_id_path = collections.OrderedDict()

    def handle_search_result(resp):
        for doc in resp.data.docs:
            if int(doc.id) not in doc_id_path:
                doc_id_path[int(doc.id)] = []
            doc_id_path[int(doc.id)].append((doc.tags['replica'], doc.tags['shard']))

    flow = Flow().add(
        name='executor1',
        uses=DummyMarkExecutor,
        replicas=NUM_REPLICAS,
        shards=NUM_SHARDS,
    )
    with flow:
        flow.search(inputs=docs, request_size=1, on_done=handle_search_result)

    assert len(doc_id_path.keys()) == len(docs)

    replica_shards = [
        tag_item for tag_items in doc_id_path.values() for tag_item in tag_items
    ]
    replicas = [r for r, s in replica_shards]
    shards = [s for r, s in replica_shards]

    assert len(set(replicas)) == NUM_REPLICAS
    assert len(set(shards)) == NUM_SHARDS
示例#4
0
def test_scale_after_rolling_update(
    docs, replicas, scale_to, expected_before_scale, expected_after_scale
):
    flow = Flow().add(
        name='executor1',
        uses=DummyMarkExecutor,
        replicas=replicas,
    )
    with flow:
        ret1 = flow.search(docs, return_results=True, request_size=1)
        flow.rolling_update('executor1', None)
        flow.scale('executor1', replicas=scale_to)
        ret2 = flow.search(docs, return_results=True, request_size=1)

    replica_ids = set()
    for r in ret1:
        for replica_id in r.docs.get_attributes('tags__replica'):
            replica_ids.add(replica_id)

    assert replica_ids == expected_before_scale

    replica_ids = set()
    for r in ret2:
        for replica_id in r.docs.get_attributes('tags__replica'):
            replica_ids.add(replica_id)
    assert replica_ids == expected_after_scale
示例#5
0
def test_override_uses_with(docs):
    flow = Flow().add(
        name='executor1',
        uses=UpdateExecutor,
        replicas=2,
        parallel=3,
    )
    with flow:
        # test rolling update does not hang
        ret1 = flow.search(docs, return_results=True)
        flow.rolling_update(
            'executor1',
            dump_path='/tmp/dump_path2/',
            uses_with={'argument1': 'version2', 'argument2': 'version2'},
        )
        ret2 = flow.search(docs, return_results=True)

    assert len(ret1) > 0
    assert len(ret1[0].docs) > 0
    for doc in ret1[0].docs:
        assert doc.tags['dump_path'] == '/tmp/dump_path1/'
        assert doc.tags['arg1'] == 'version1'
        assert doc.tags['arg2'] == 'version1'

    assert len(ret2) > 0
    assert len(ret2[0].docs) > 0
    for doc in ret2[0].docs:
        assert doc.tags['dump_path'] == '/tmp/dump_path2/'
        assert doc.tags['arg1'] == 'version2'
        assert doc.tags['arg2'] == 'version2'
示例#6
0
def test_simple_run(docs):
    flow = Flow().add(
        name='pod1',
        replicas=2,
        parallel=3,
    )
    with flow:
        # test rolling update does not hang
        flow.search(docs)
        flow.rolling_update('pod1', None)
        flow.search(docs)
示例#7
0
def test_simple_run(docs):
    flow = Flow().add(
        name='executor1',
        replicas=2,
        shards=3,
    )
    with flow:
        # test rolling update does not hang
        flow.search(docs)
        flow.rolling_update('executor1', None)
        flow.search(docs)
def test_override_config_params_parallel():
    flow = Flow(return_results=True).add(
        uses=os.path.join(cur_dir, 'default_config.yml'),
        uses_with={'param1': 50, 'param2': 30},
        uses_metas={'workspace': 'different_workspace'},
        parallel=2,
    )
    with flow:
        resps = flow.search(inputs=[Document()], return_results=True)
    doc = resps[0].docs[0]
    assert doc.tags['param1'] == 50
    assert doc.tags['param2'] == 30
    assert doc.tags['param3'] == 10  # not overriden
    assert doc.tags['name'] == 'name'  # not override
    assert doc.tags['workspace'] == 'different_workspace'
示例#9
0
def test_override_config_params_shards(docker_image):
    flow = Flow(return_results=True).add(
        uses='docker://override-config-test',
        uses_with={'param1': 50, 'param2': 30},
        uses_metas={'workspace': 'different_workspace'},
        shards=2,
    )
    with flow:
        resps = flow.search(inputs=[Document()], return_results=True)
    doc = resps[0].docs[0]
    assert doc.tags['param1'] == 50
    assert doc.tags['param2'] == 30
    assert doc.tags['param3'] == 10  # not overriden
    assert doc.tags['name'] == 'name'  # not override
    assert doc.tags['workspace'] == 'different_workspace'
示例#10
0
def test_custom_dockerfile():
    f = Flow().add(
        uses='DummyRedisIndexer',
        py_modules=[os.path.join(cur_dir, 'redis_executor.py')],
        upload_files=[
            os.path.join(cur_dir,
                         '../../daemon/unit/models/good_ws_custom_dockerfile'),
        ],
        host='localhost:8000',
    )
    with f:
        f.index(inputs=(Document(text=f'{i}', embedding=np.random.rand(2, 3))
                        for i in range(5)), )
        resp = f.search(inputs=[Document(text='3')], return_results=True)
        assert resp[0].docs[0].matches[0].text == '3'
        assert resp[0].docs[0].matches[0].embedding.shape == (2, 3)
示例#11
0
def _benchmark_qps() -> Dict[str, float]:
    """Benchmark Jina Core Indexing and Query.

    Returns:
        A dict mapping keys
    """
    args = set_hw_parser().parse_args()
    args.workdir = os.path.join(os.getcwd(), 'original')
    args.num_query = 4096

    targets = {
        'index-labels': {
            'url': args.index_labels_url,
            'filename': os.path.join(args.workdir, 'index-labels'),
        },
        'query-labels': {
            'url': args.query_labels_url,
            'filename': os.path.join(args.workdir, 'query-labels'),
        },
        'index': {
            'url': args.index_data_url,
            'filename': os.path.join(args.workdir, 'index-original'),
        },
        'query': {
            'url': args.query_data_url,
            'filename': os.path.join(args.workdir, 'query-original'),
        },
    }

    # download the data
    Path(args.workdir).mkdir(parents=True, exist_ok=True)
    download_data(targets, args.download_proxy)

    try:
        f = Flow().add(uses=MyEncoder).add(workspace='./', uses=MyIndexer)

        with f:
            # do index
            log.info('Benchmarking index')
            st = time.perf_counter()
            f.index(
                index_generator(num_docs=targets['index']['data'].shape[0],
                                target=targets),
                show_progress=True,
            )
            index_time = time.perf_counter() - st
            log.info(
                'Indexed %d docs within %d seconds',
                targets['index']['data'].shape[0],
                index_time,
            )

            # do query
            log.info('Benchmarking query')
            st = time.perf_counter()
            f.search(
                query_generator(num_docs=args.num_query, target=targets),
                shuffle=True,
                parameters={'top_k': args.top_k},
                show_progress=True,
            )
            query_time = time.perf_counter() - st
            log.info('%d query within %d seconds', args.num_query, query_time)

    except Exception as e:
        log.error(e)
        sys.exit(1)

    return {
        'index_time': index_time,
        'query_time': query_time,
        'index_qps': targets['index']['data'].shape[0] / index_time,
        'query_qps': args.num_query / query_time,
    }