Пример #1
0
def test_flow_yaml_dump():
    f = Flow(optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY, no_gateway=True)
    f.save_config('test1.yml')

    fl = Flow.load_config('test1.yml')
    assert f.args.optimize_level == fl.args.optimize_level
    rm_files(['test1.yml'])
Пример #2
0
def test_query_multi_modal_text():
    f = Flow.load_config('flows/query.yml')
    search_text = 'It makes sense to first define what we mean by multimodality before going into more fancy terms.'
    doc = Document(text=search_text)

    with f:
        f.post('/search', inputs=doc, on_done=assert_result)
Пример #3
0
def test_query_multi_modal_image():
    f = Flow.load_config('flows/query.yml')
    with f:
        f.post('/search',
               inputs=search_generator(data_path='toy_data/photo-1.png'),
               read_mode='r',
               on_done=assert_result)
Пример #4
0
def test_flow_with_pod_envs():
    f = Flow.load_config('yaml/flow-with-envs.yml')

    class EnvChecker1(BaseExecutor):
        """Class used in Flow YAML"""
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # pea/pod-specific
            assert os.environ['key1'] == 'value1'
            assert os.environ['key2'] == 'value2'
            # inherit from parent process
            assert os.environ['key_parent'] == 'value3'

    class EnvChecker2(BaseExecutor):
        """Class used in Flow YAML"""
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            # pea/pod-specific
            assert 'key1' not in os.environ
            assert 'key2' not in os.environ
            # inherit from parent process
            assert os.environ['key_parent'] == 'value3'

    with f:
        pass
Пример #5
0
def test_query_text(tmpdir_factory):
    def assert_result(response):
        docs = response.docs
        # check number of results
        assert len(docs) == 1
        assert len(docs[0].chunks) == 2
        parent_docs = docs[0].matches
        parent_ids = parent_docs.get_attributes('id')
        assert len(parent_docs) > 0
        for chunk in docs[0].chunks:
            assert len(chunk.matches) == 5  # top_k = 5
            match_ids = chunk.matches.get_attributes('id')
            assert len(match_ids) == len(list(set(match_ids)))
            for match in chunk.matches:
                assert match.text is not None
                assert match.location is not None
                assert match.parent_id in parent_ids
                assert match.text in parent_docs[parent_ids.index(
                    match.parent_id)].text

    flow = Flow.load_config('flows/query.yml')
    with flow:
        search_text = 'looked through every window then. hello world.'
        doc = Document(content=search_text, mime_type='text/plain')
        response = flow.post('/search',
                             inputs=doc,
                             parameters={'top_k': 5},
                             return_results=True)
        assert_result(response[0])
Пример #6
0
def test_flow_identity_override():
    f = Flow().add().add(shards=2).add(shards=2)

    with f:
        assert len(set(p.args.identity for _, p in f)) == f.num_pods

    f = Flow(identity='123456').add().add(shards=2).add(shards=2)

    with f:
        assert len(set(p.args.identity for _, p in f)) == 1

    y = '''
!Flow
version: '1.0'
executors:
    - name: hello
    - name: world
      shards: 3
    '''

    f = Flow.load_config(y)
    for _, p in f:
        p.args.identity = '1234'

    with f:
        assert len(set(p.args.identity for _, p in f)) == 2
        for _, p in f:
            if p.args.identity != '1234':
                assert p.name == 'gateway'
Пример #7
0
def main(task, num_docs):
    config()
    if task == 'index':
        workspace = os.environ['JINA_WORKSPACE']
        if os.path.exists(workspace):
            print(
                f'\n +---------------------------------------------------------------------------------+ \
                    \n |                                   ������                                        | \
                    \n | The directory {workspace} already exists. Please remove it before indexing again. | \
                    \n |                                   ������                                        | \
                    \n +---------------------------------------------------------------------------------+'
            )
            sys.exit(1)
        pdf_files = glob.glob(os.path.join(PDF_DATA_PATH, '*.pdf'))
        index(pdf_files[:num_docs])
    if task == 'query':
        query()
    if task == 'query_text':
        query_text()
    if task == 'query_image':
        query_image()
    if task == 'query_pdf':
        query_pdf()
    if task == 'query_restful':
        f = Flow.load_config('flows/query-multimodal.yml')
        with f:
            f.block()
    if task == "dryrun":
        dryrun()
Пример #8
0
def test_dump(tmpdir, nr_docs, emb_size, shards):
    docs = list(get_documents(nr=nr_docs, index_start=0, emb_size=emb_size))
    assert len(docs) == nr_docs

    dump_path = os.path.join(str(tmpdir), 'dump_dir')
    os.environ['DBMS_WORKSPACE'] = os.path.join(str(tmpdir), 'index_ws')
    print('DBMS_WORKSPACE ', os.environ['DBMS_WORKSPACE'])
    with Flow.load_config('flow_dbms.yml') as flow_dbms:
        with TimeContext(f'### indexing {len(docs)} docs'):
            flow_dbms.index(docs)

        with TimeContext(f'### dumping {len(docs)} docs'):
            flow_dbms.dump('indexer_dbms', dump_path, shards=shards, timeout=-1)

        dir_size = path_size(dump_path)
        print(f'### dump path size: {dir_size} MBs')

    with BaseExecutor.load(os.path.join(os.environ['DBMS_WORKSPACE'], 'psql-0', 'psql.bin')) as idx:
        assert idx.size == nr_docs

    # assert data dumped is correct
    for pea_id in range(shards):
        assert_dump_data(dump_path, docs, shards, pea_id)

    # required to pass next tests
    with BaseExecutor.load(os.path.join(os.environ['DBMS_WORKSPACE'], 'psql-0', 'psql.bin')) as idx:
        idx.delete([d.id for d in docs])
Пример #9
0
def test_flow_identity_override():
    f = Flow().add().add(parallel=2).add(parallel=2)

    with f:
        assert len(set(p.args.identity for _, p in f)) == f.num_pods

    f = Flow(identity='123456').add().add(parallel=2).add(parallel=2)

    with f:
        assert len(set(p.args.identity for _, p in f)) == 1

    y = '''
!Flow
version: '1.0'
pods:
    - uses: _pass
    - uses: _pass
      parallel: 3
    '''

    f = Flow.load_config(y)
    for _, p in f:
        p.args.identity = '1234'

    with f:
        assert len(set(p.args.identity for _, p in f)) == 2
        for _, p in f:
            if p.args.identity != '1234':
                assert p.name == 'gateway'
Пример #10
0
def test_flow_with_jump(tmpdir):
    f = (Flow().add(name='r1').add(name='r2').add(name='r3', needs='r1').add(
        name='r4',
        needs='r2').add(name='r5', needs='r3').add(name='r6', needs='r4').add(
            name='r8', needs='r6').add(name='r9',
                                       needs='r5').add(name='r10',
                                                       needs=['r9', 'r8']))

    with f:
        _validate_flow(f)

    f.save_config(os.path.join(str(tmpdir), 'tmp.yml'))
    Flow.load_config(os.path.join(str(tmpdir), 'tmp.yml'))

    with Flow.load_config(os.path.join(str(tmpdir), 'tmp.yml')) as f:
        _validate_flow(f)
Пример #11
0
def flow(request, temp_workspace):
    source = request.param
    if source == 'python':
        f = (Flow().add(name='first').add(
            uses=ConditionDumpExecutor,
            uses_metas={
                'name': 'exec1'
            },
            workspace=os.environ['TEMP_WORKSPACE'],
            name='exec1',
            needs=['first'],
            when={
                'tags__type': {
                    '$eq': 1
                }
            },
        ).add(
            uses=ConditionDumpExecutor,
            workspace=os.environ['TEMP_WORKSPACE'],
            uses_metas={
                'name': 'exec2'
            },
            name='exec2',
            needs='first',
            when={
                'tags__type': {
                    '$gt': 1
                }
            },
        ).needs_all('joiner'))
    else:
        f = Flow.load_config(os.path.join(cur_dir, 'flow.yml'))
    return f
Пример #12
0
def query_image():
    f = Flow.load_config('flows/query-only-image.yml')
    with f:
        print('image search:')
        f.search(input_fn=search_generator(data_path='toy_data/photo-1.png'),
                 read_mode='r',
                 on_done=get_pdf)
Пример #13
0
def test_flow_identical(tmpdir):
    with open(os.path.join(cur_dir, '../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)

    b = (Flow().add(name='chunk_seg',
                    parallel=3).add(name='wqncode1', parallel=2).add(
                        name='encode2', parallel=2,
                        needs='chunk_seg').join(['wqncode1', 'encode2']))

    a.save_config(os.path.join(str(tmpdir), 'test2.yml'))

    c = Flow.load_config(os.path.join(str(tmpdir), 'test2.yml'))

    assert a == b
    assert a == c

    with a as f:
        node = f._pod_nodes['gateway']
        assert node.head_args.socket_in == SocketType.PULL_CONNECT
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['chunk_seg']
        assert node.head_args.socket_in == SocketType.PULL_BIND
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUB_BIND

        node = f._pod_nodes['wqncode1']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT

        node = f._pod_nodes['encode2']
        assert node.head_args.socket_in == SocketType.SUB_CONNECT
        assert node.head_args.socket_out == SocketType.ROUTER_BIND
        for arg in node.peas_args['peas']:
            assert arg.socket_in == SocketType.DEALER_CONNECT
            assert arg.socket_out == SocketType.PUSH_CONNECT
        assert node.tail_args.socket_in == SocketType.PULL_BIND
        assert node.tail_args.socket_out == SocketType.PUSH_CONNECT
Пример #14
0
def test_load_legacy_and_v1():
    Flow.load_config('yaml/flow-legacy-syntax.yml')
    Flow.load_config('yaml/flow-v1-syntax.yml')

    # this should fallback to v1
    Flow.load_config('yaml/flow-v1.0-syntax.yml')

    with pytest.raises(BadFlowYAMLVersion):
        Flow.load_config('yaml/flow-v99-syntax.yml')
Пример #15
0
def test_dump_load_build(monkeypatch):
    f: Flow = Flow.load_config('''
    jtype: Flow
    with:
        name: abc
        port_expose: 12345
        protocol: http
    executors:
        - name: executor1
          port_in: 45678
          parallel: 2
        - name: executor2
          uses: docker://exec
          host: 1.2.3.4
        - name: executor3
          uses: docker://exec
          parallel: 2
    ''').build()
    f['gateway'].args.runs_in_docker = True
    f['executor1'].args.runs_in_docker = True

    f1: Flow = Flow.load_config(JAML.dump(f)).build()
    assert not f1[
        'gateway'].args.runs_in_docker  # gateway doesn't have custom args set, as env was not set
    assert f1['executor1'].args.runs_in_docker
    # these were passed by the user
    assert f.port_expose == f1.port_expose
    assert f.protocol == f1.protocol
    assert f['executor1'].args.port_in == f1['executor1'].args.port_in
    assert f['executor2'].args.host == f1['executor2'].args.host
    # this was set during `load_config`
    assert f['executor2'].args.port_in == f1['executor2'].args.port_in
    assert f['executor3'].args.port_out == f1['executor3'].args.port_out
    # gateway args are not set, if `JINA_FULL_CLI` is not set
    assert f['gateway'].args.port_in != f1['gateway'].args.port_in
    assert f['gateway'].args.port_out != f1['gateway'].args.port_out

    monkeypatch.setenv('JINA_FULL_CLI', 'true')
    f2: Flow = Flow.load_config(JAML.dump(f)).build()
    assert f2['gateway'].args.runs_in_docker
    assert f2['executor1'].args.runs_in_docker
    # these were passed by the user
    assert f.port_expose == f2.port_expose
    # validate gateway args (set during build)
    assert f['gateway'].args.port_in == f2['gateway'].args.port_in
    assert f['gateway'].args.port_out == f2['gateway'].args.port_out
    assert f['gateway'].args.port_ctrl == f2['gateway'].args.port_ctrl
Пример #16
0
    def add(
        self,
        args: Namespace,
        port_mapping: Optional[PortMappings] = None,
        envs: Optional[Dict] = {},
        **kwargs,
    ) -> PartialStoreItem:
        """Starts a Flow in `partial-daemon`.

        :param args: namespace args for the flow
        :param port_mapping: ports to be set
        :param envs: environment variables to be passed into partial flow
        :param kwargs: keyword args
        :return: Item describing the Flow object
        """
        try:
            if not args.uses:
                raise ValueError(
                    'uses yaml file was not specified in flow definition')
            elif not Path(args.uses).is_file():
                raise ValueError(f'uses {args.uses} not found in workspace')

            self.object: Flow = Flow.load_config(args.uses).build()
            self.object.workspace_id = jinad_args.workspace_id
            self.object.workspace = __partial_workspace__
            self.object.env = {'HOME': __partial_workspace__, **envs}

            for deployment in self.object._deployment_nodes.values():
                runtime_cls = update_runtime_cls(deployment.args,
                                                 copy=True).runtime_cls
                if port_mapping and (hasattr(deployment.args, 'replicas')
                                     and deployment.args.replicas > 1):
                    for pod_args in [deployment.pod_args['head']]:
                        if pod_args.name in port_mapping.pod_names:
                            for port_name in Ports.__fields__:
                                self._set_pod_ports(pod_args, port_mapping,
                                                    port_name)
                    deployment.update_worker_pod_args()

            self.object = self.object.__enter__()
        except Exception as e:
            if hasattr(self, 'object'):
                self.object.__exit__(type(e), e, e.__traceback__)
            self._logger.error(f'{e!r}')
            raise
        else:
            with open(args.uses) as yaml_file:
                yaml_source = yaml_file.read()

            self.item = PartialFlowItem(
                arguments={
                    'port_expose': self.object.port_expose,
                    'protocol': self.object.protocol.name,
                    **vars(self.object.args),
                },
                yaml_source=yaml_source,
            )
            self._logger.success(f'Flow is created successfully!')
            return self.item
Пример #17
0
def main(index_num_docs, evaluate_num_docs, request_size, data_set, model_name,
         evaluation_mode):
    config(model_name)
    if index_num_docs > 0:
        with Flow.load_config('flow-index.yml') as f:
            f.use_rest_gateway()
            f.index(input_fn=input_index_data(index_num_docs, request_size,
                                              data_set),
                    request_size=request_size)
    with Flow.load_config('flow-query.yml').add(
            name='evaluator', uses='yaml/evaluate.yml') as flow_eval:
        flow_eval.search(input_fn=evaluation_generator(evaluate_num_docs,
                                                       request_size,
                                                       data_set,
                                                       mode=evaluation_mode),
                         on_done=print_evaluation_score)
    print(f'MeanReciprocalRank is: {sum_of_score / num_of_searches}')
Пример #18
0
def index(pdf_files):
    f = Flow.load_config('flows/index.yml')
    # f.plot()
    with f:
        with TimeContext(f'QPS: indexing {len(pdf_files)}', logger=f.logger):
            from jina.clients.helper import pprint_routes
            f.index(inputs=index_generator(data_path=pdf_files), read_mode='r', on_done=pprint_routes,
                    request_size=1)
Пример #19
0
def index(data_set, num_docs, request_size):
    f = Flow.load_config('flow-index.yml')
    with f:
        with TimeContext(f'QPS: indexing {num_docs}', logger=f.logger):
            f.index(
                inputs=input_index_data(num_docs, request_size, data_set),
                request_size=request_size
            )
Пример #20
0
def test_load_flow_from_yaml():
    with open(cur_dir.parent / 'yaml' / 'test-flow.yml') as fp:
        a = Flow.load_config(fp)
        with a:
            with open(str(cur_dir.parent / 'yaml' / 'swarm-out.yml'),
                      'w') as fp:
                a.to_swarm_yaml(fp)
        rm_files([str(cur_dir.parent / 'yaml' / 'swarm-out.yml')])
Пример #21
0
def query_pdf():
    f = Flow.load_config('flows/query-only-pdf.yml')
    with f:
        print('pdf search:')
        f.search(
            input_fn=search_generator(data_path='toy_data/blog2-pages-1.pdf'),
            read_mode='r',
            on_done=get_pdf)
Пример #22
0
    def port_expose(self) -> str:
        """
        Sets `port_expose` for the Flow started in `mini-jinad`.
        NOTE: this port needs to be exposed before starting `mini-jinad`, hence set here.

        :return: port_expose
        """
        f = Flow.load_config(str(self.localpath()))
        return f.port_expose or random_port()
Пример #23
0
def index(num_docs):
    flow = Flow.load_config('flows/index.yml')
    with flow:
        input_docs = input_generator(num_docs=num_docs)
        data_path = os.path.join(os.path.dirname(__file__),
                                 os.environ.get('JINA_DATA_FILE', None))
        flow.logger.info(f'Indexing {data_path}')
        flow.post(on='/index', inputs=input_docs, request_size=10,
                  show_progress=True)
Пример #24
0
def query_text():
    f = Flow.load_config('flows/query-only-text.yml')
    with f:
        d = Document()
        search_text = 'It makes sense to first define what we mean by multimodality before going into morefancy terms.'  # blog1
        # search_text = 'We all know about CRUD[1]. Every app out there does it.'#blog2
        # search_text = 'Developing a Jina app often means writing YAML configs.'#blog3
        d.text = search_text
        print('text search:')
        f.search(input_fn=d, on_done=get_pdf)
Пример #25
0
def test_flow_identical(tmpdir):
    with open(os.path.join(cur_dir, '../../../yaml/test-flow.yml')) as fp:
        a = Flow.load_config(fp)

    b = (Flow().add(name='chunk_seg',
                    shards=3).add(name='wqncode1',
                                  shards=2).add(name='encode2',
                                                shards=2,
                                                needs='chunk_seg').join(
                                                    ['wqncode1', 'encode2']))

    a.save_config(os.path.join(str(tmpdir), 'test2.yml'))

    c = Flow.load_config(os.path.join(str(tmpdir), 'test2.yml'))

    assert a == b
    assert a == c

    with a as f:
        _validate_flow(f)
Пример #26
0
def query(num_doc, target: dict):
    f = Flow.load_config('flows/query.yml')
    with f:
        with TimeContext(f'QPS: query with {num_doc}', logger=f.logger):
            f.search(query_generator(num_doc, target),
                     shuffle=True,
                     size=128,
                     on_done=print_result,
                     request_size=32,
                     top_k=TOP_K)
    write_html(os.path.join(os.getenv('JINA_WORKDIR'), 'hello-world.html'))
Пример #27
0
def test_flow_yaml_dump():
    f = Flow(logserver_config=str(cur_dir.parent / 'yaml' /
                                  'test-server-config.yml'),
             optimize_level=FlowOptimizeLevel.IGNORE_GATEWAY,
             no_gateway=True)
    f.save_config('test1.yml')

    fl = Flow.load_config('test1.yml')
    assert f.args.logserver_config == fl.args.logserver_config
    assert f.args.optimize_level == fl.args.optimize_level
    rm_files(['test1.yml'])
Пример #28
0
def test_add_needs_inspect(tmpdir):
    f1 = (Flow().add(name='pod0', needs='gateway').add(
        name='pod1', needs='gateway').inspect().needs(['pod0', 'pod1']))
    with f1:
        f1.index_ndarray(np.random.random([5, 5]), on_done=print)

    f2 = Flow.load_config('yaml/flow-v1.0-syntax.yml')

    with f2:
        f2.index_ndarray(np.random.random([5, 5]), on_done=print)

    assert f1 == f2
Пример #29
0
def test_add_needs_inspect(tmpdir):
    f1 = (Flow().add(name='executor0', needs='gateway').add(
        name='executor1',
        needs='gateway').inspect().needs(['executor0', 'executor1']))
    with f1:
        _ = f1.index(from_ndarray(np.random.random([5, 5])))
        f2 = Flow.load_config('yaml/flow-v1.0-syntax.yml')

        with f2:
            _ = f2.index(from_ndarray(np.random.random([5, 5])))

            assert f1 == f2
Пример #30
0
def index(num_docs: int):
    # Runs indexing for all images
    num_docs = min(
        num_docs,
        len(glob(os.path.join(os.getcwd(), IMAGE_SRC), recursive=True)))

    with Flow.load_config('flows/index.yml') as flow:
        document_generator = from_files(IMAGE_SRC, size=num_docs)
        flow.post(on='/index',
                  inputs=DocumentArray(document_generator),
                  request_size=64,
                  read_mode='rb')