Exemplo n.º 1
0
        async def _compute_response():
            response_msg = copy.deepcopy(request)
            exec_endpoint = request.header.exec_endpoint
            new_docs = DocumentArray()
            await asyncio.sleep(0.1)
            if deployment == 'indexer-executor':
                if exec_endpoint == '/index':
                    time.sleep(0.1)
                    self._docs.extend(request.docs)
                else:
                    docs = response_msg.docs
                    docs.clear()
                    docs.extend(
                        DocumentArray(
                            Document(tags={'ids': self._docs[:, 'id']})))
                    response_msg.data.docs = docs
                return response_msg
            else:
                if deployment == 'slow-executor':
                    await asyncio.sleep(SLOW_EXECUTOR_SLEEP_TIME)
                for doc in request.docs:
                    new_doc = Document(doc, copy=True)
                    new_doc.tags['executor'] = time.time()
                    print(
                        f'in {deployment}, {new_doc.id} => time: {readable_time_from(new_doc.tags["executor"])}, {new_doc.tags["executor"]}',
                        flush=True,
                    )
                    new_docs.append(new_doc)

                docs = response_msg.docs
                docs.clear()
                docs.extend(new_docs)
                response_msg.data.docs = docs
                return response_msg
Exemplo n.º 2
0
 def _yield_data_request():
     req = DataRequest()
     req.header.request_id = random_identity()
     da = DocumentArray()
     da.append(Document())
     req.data.docs = da
     return req
Exemplo n.º 3
0
    def send_requests_once(
        self,
        requests: List[Request],
        deployment: str,
        head: bool,
        endpoint: str = None,
        timeout: float = 1.0,
    ) -> asyncio.Task:
        assert head
        self.deployments_called.append(deployment)
        response_msg = copy.deepcopy(requests[0])
        new_docs = DocumentArray()
        for doc in requests[0].docs:
            clientid = doc.text[0:7]
            self.sent_msg[clientid][deployment] = doc.text
            new_doc = Document(
                text=doc.text + f'-{clientid}-{deployment}', tags=doc.tags
            )
            new_docs.append(new_doc)
            self.responded_messages[clientid][deployment] = new_doc.text

        response_msg.data.docs = new_docs

        async def task_wrapper():
            import random

            await asyncio.sleep(1 / (random.randint(1, 3) * 10))
            return response_msg, {}

        return asyncio.create_task(task_wrapper())
Exemplo n.º 4
0
    def debug(self, docs_matrix: List[DocumentArray], **kwargs):
        self.logger.debug(
            f'Received doc matrix in exec-merger with length {len(docs_matrix)}.'
        )

        result = DocumentArray()
        for docs in zip(*docs_matrix):
            traversed_executors = [
                doc.tags['traversed-executors'] for doc in docs
            ]
            shard_ids = [doc.tags['shard_id'] for doc in docs]
            pea_ids = [doc.tags['pea_id'] for doc in docs]
            shards = [doc.tags['shards'] for doc in docs]
            parallels = [doc.tags['parallel'] for doc in docs]
            traversed_executors = list(chain(*traversed_executors))
            doc = Document()
            doc.tags['traversed-executors'] = traversed_executors
            doc.tags['shard_id'] = shard_ids
            doc.tags['pea_id'] = pea_ids
            doc.tags['shards'] = shards
            doc.tags['parallel'] = parallels

            result.append(doc)

        return result
Exemplo n.º 5
0
    def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict,
             **kwargs) -> 'DocumentArray':
        """
        :param docs: the doc which gets bubbled up matches
        :param kwargs: not used (kept to maintain interface)
        """

        result_da = DocumentArray(
        )  # length: 1 as every time there is only one query
        for d_mod1, d_mod2 in zip(*docs_matrix):

            final_matches = {}  # type: Dict[str, Document]

            for m in d_mod1.matches:
                m.score.value *= d_mod1.weight
                final_matches[m.parent_id] = Document(m, copy=True)

            for m in d_mod2.matches:
                if m.parent_id in final_matches:
                    final_matches[m.parent_id].score.value += (m.score.value *
                                                               d_mod2.weight)
                else:
                    m.score.value *= d_mod2.weight
                    final_matches[m.parent_id] = Document(m, copy=True)

            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.score.value, reverse=True)
            d = Document(matches=da[:int(parameters['top_k'])])
            result_da.append(d)
        return result_da
Exemplo n.º 6
0
    def send_requests_once(
        self,
        requests,
        deployment: str,
        head: bool,
        endpoint: str = None,
        timeout: float = 1.0,
    ) -> asyncio.Task:
        assert head
        request = requests[0]
        response_msg = copy.deepcopy(request)
        new_docs = DocumentArray()
        docs = request.docs
        for doc in docs:
            clientid = doc.text[0:7]
            new_doc = Document(id=doc.id,
                               text=doc.text + f'-{clientid}-{deployment}')
            new_docs.append(new_doc)

        response_msg.data.docs = new_docs

        async def task_wrapper():
            import random

            await asyncio.sleep(1 / (random.randint(1, 3) * 10))
            return response_msg, {}

        return asyncio.create_task(task_wrapper())
Exemplo n.º 7
0
 def _get_sync_requests_iterator(num_requests):
     for i in range(num_requests):
         req = DataRequest()
         req.header.request_id = random_identity()
         da = DocumentArray()
         da.append(Document())
         req.data.docs = da
         yield req
Exemplo n.º 8
0
 def join_reduce(self, docs_matrix: List[DocumentArray], parameters, **kwargs):
     final_docs = DocumentArray()
     for doc_arr in docs_matrix:
         if not doc_arr:
             continue
         for doc in doc_arr:
             final_docs.append(doc)
     return final_docs
Exemplo n.º 9
0
def encoder_doc_array_for_search(encoder_doc_array, tmpdir):
    create_test_img(path=str(tmpdir), file_name='1.png')
    da = DocumentArray()
    for doc in encoder_doc_array:
        for chunk in doc.chunks:
            if chunk.mime_type == 'image/jpeg':
                chunk.convert_uri_to_datauri()
        da.append(doc)
    return da
Exemplo n.º 10
0
def test_union(docarray, document_factory):
    additional_docarray = DocumentArray([])
    for idx in range(4, 10):
        doc = document_factory.create(idx, f'test {idx}')
        additional_docarray.append(doc)
    union = docarray + additional_docarray
    for idx in range(0, 3):
        assert union[idx].id == docarray[idx].id
    for idx in range(0, 6):
        assert union[idx + 3].id == additional_docarray[idx].id
Exemplo n.º 11
0
def test_document_save_load(method, tmp_path):
    da1 = DocumentArray(random_docs(1000))
    da2 = DocumentArray()
    for doc in random_docs(10):
        da2.append(doc)
    for da in [da1, da2]:
        tmp_file = os.path.join(tmp_path, 'test')
        with TimeContext(f'w/{method}'):
            da.save(tmp_file, file_format=method)
        with TimeContext(f'r/{method}'):
            da_r = DocumentArray.load(tmp_file, file_format=method)
        assert len(da) == len(da_r)
        for d, d_r in zip(da, da_r):
            assert d.id == d_r.id
            np.testing.assert_equal(d.embedding, d_r.embedding)
            assert d.content == d_r.content
Exemplo n.º 12
0
    def debug(self, docs_matrix: List[DocumentArray], parameters: Dict,
              **kwargs):
        self.logger.debug(
            f'Received doc matrix in exec-merger with length {len(docs_matrix)}.'
        )

        result = DocumentArray()
        for docs in zip(*docs_matrix):
            traversed_executors = [
                doc.tags['traversed-executors'] for doc in docs
            ]
            traversed_executors = list(chain(*traversed_executors))
            doc = Document()
            doc.tags['traversed-executors'] = traversed_executors
            result.append(doc)

        return result
Exemplo n.º 13
0
    def rank(self, docs_matrix: List[DocumentArray], parameters: Dict, **kwargs):
        result = DocumentArray()
        docs_matrix = [doc_arr for doc_arr in docs_matrix if doc_arr is not None and len(doc_arr) > 0]

        for single_doc_per_modality in zip(*docs_matrix):
            final_matches = {}
            for doc in single_doc_per_modality:
                for m in doc.matches:
                    if m.tags['root_doc_id'] in final_matches:
                        final_matches[m.tags['root_doc_id']].score.value += m.score.value
                    else:
                        final_matches[m.tags['root_doc_id']] = Document(id=m.tags['root_doc_id'], score=m.score)
            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.score.value, reverse=True)
            d = Document(matches=da[: int(parameters.get('top_k', 3))])
            result.append(d)
        return result
Exemplo n.º 14
0
def docarray_for_nest_split():
    da = DocumentArray()
    da.append(Document(tags={'nest': {'category': 'c'}}))
    da.append(Document(tags={'nest': {'category': 'c'}}))
    da.append(Document(tags={'nest': {'category': 'b'}}))
    da.append(Document(tags={'nest': {'category': 'a'}}))
    da.append(Document(tags={'nest': {'category': 'a'}}))
    return da
Exemplo n.º 15
0
def docarray_for_split_at_zero():
    da = DocumentArray()
    da.append(Document(tags={'category': 0.0}))
    da.append(Document(tags={'category': 0.0}))
    da.append(Document(tags={'category': 1.0}))
    da.append(Document(tags={'category': 2.0}))
    da.append(Document(tags={'category': 2.0}))
    return da
Exemplo n.º 16
0
    def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict,
             **kwargs) -> 'DocumentArray':
        """
        :param docs_matrix: list of :class:`DocumentArray` on multiple requests to
          get bubbled up matches.
        :param parameters: the parameters passed into the ranker, in this case stores :attr`top_k`
          to filter k results based on score.
        :param kwargs: not used (kept to maintain interface)
        """

        result_da = DocumentArray(
        )  # length: 1 as every time there is only one query
        for d_mod1, d_mod2 in zip(*docs_matrix):

            final_matches = {}  # type: Dict[str, Document]

            for m in d_mod1.matches:
                m.scores[
                    'relevance'] = m.scores['cosine'].value * d_mod1.weight
                final_matches[m.parent_id] = Document(m, copy=True)

            for m in d_mod2.matches:
                if m.parent_id in final_matches:
                    final_matches[
                        m.parent_id].scores['relevance'] = final_matches[
                            m.parent_id].scores['relevance'].value + (
                                m.scores['cosine'].value * d_mod2.weight)
                else:
                    m.scores[
                        'relevance'] = m.scores['cosine'].value * d_mod2.weight
                    final_matches[m.parent_id] = Document(m, copy=True)

            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.scores['relevance'].value, reverse=True)
            d = Document(matches=da[:int(parameters['top_k'])])
            result_da.append(d)
        return result_da
Exemplo n.º 17
0
def docarray_for_split_at_zero():
    da = DocumentArray()
    da.append(Document(tags={'category': 0.0}))
    da.append(Document(tags={'category': 0.0}))
    da.append(Document(tags={'category': 1.0}))
    da.append(Document(tags={'category': 2.0}))
    da.append(Document(tags={'category': 2.0}))
    dam = DocumentArrayMemmap()
    dam.extend(da)
    return da, dam
Exemplo n.º 18
0
def docarray_for_split():
    da = DocumentArray()
    da.append(Document(tags={'category': 'c'}))
    da.append(Document(tags={'category': 'c'}))
    da.append(Document(tags={'category': 'b'}))
    da.append(Document(tags={'category': 'a'}))
    da.append(Document(tags={'category': 'a'}))
    dam = DocumentArrayMemmap()
    dam.extend(da)
    return da, dam
Exemplo n.º 19
0
 def foo(self, docs: DocumentArray, **kwargs):
     docs.append(Document(text=str(self.shard_id)))
     return docs
Exemplo n.º 20
0
def create_req_from_text(text: str):
    req = DataRequest()
    da = DocumentArray()
    da.append(Document(text=text, tags={'key': 4}))
    req.data.docs = da
    return req
Exemplo n.º 21
0
 def index(self, docs: 'DocumentArray', **kwargs) -> DocumentArray:
     embedding_docs = DocumentArray()
     for doc in docs:
         embedding_docs.append(Document(id=doc.id, embedding=doc.embedding))
     self._docs.extend(embedding_docs)
     return docs
Exemplo n.º 22
0
def test_content_hash():
    d0 = Document(content='a')
    assert d0.content

    empty_doc = Document()
    assert not empty_doc.content
    assert empty_doc.content_hash

    # warning: a Doc with empty content will have a hash -- it hashes ''
    assert empty_doc.content_hash != d0.content_hash

    d1 = Document(content='text')
    init_content_hash = d1.content_hash
    assert init_content_hash
    assert init_content_hash == d1.content_hash

    d2 = Document(content='text')
    assert init_content_hash == d2.content_hash

    d3 = Document(content='text1')
    assert init_content_hash != d3.content_hash

    d4 = Document(id='a')
    d5 = Document(id='b')
    assert d5.content_hash == d4.content_hash

    d6 = Document(d2.proto)
    assert d6.content_hash == d2.content_hash

    d7 = Document(d2)
    assert d6.content_hash == d2.content_hash == d7.content_hash

    # test hash image
    d8 = Document(blob=np.array([1, 3, 5]))
    d9 = Document(blob=np.array([2, 4, 6]))
    d10 = Document(blob=np.array([1, 3, 5]))
    assert d8.content_hash != d9.content_hash
    assert d8.content_hash == d10.content_hash

    # test hash buffer
    d11 = Document(content=b'buffer1')
    d12 = Document(content=b'buffer2')
    d13 = Document(content=b'buffer1')
    assert d11.content_hash != d12.content_hash
    assert d11.content_hash == d13.content_hash

    # document with more fields
    d14 = Document(uri='http://test1.com',
                   tags={'key1': 'value1'},
                   granularity=2,
                   adjacency=2)
    d15 = Document(uri='http://test2.com',
                   tags={'key1': 'value2'},
                   granularity=3,
                   adjacency=2)
    d16 = Document(uri='http://test2.com',
                   tags={'key1': 'value2'},
                   granularity=3,
                   adjacency=2)
    assert d14.content_hash != d15.content_hash
    assert d15.content_hash == d16.content_hash

    nr = 10
    with TimeContext(f'creating {nr} docs without hashing content at init'):
        da = DocumentArray()
        for _ in range(nr):
            d = Document(content='text' * 2)
            da.append(d)

    with TimeContext(f'creating {nr} docs with hashing content at init'):
        da = DocumentArray()
        for _ in range(nr):
            d = Document(content='text' * 2)
            da.append(d)

        with TimeContext(f'iterating through docs with content hash'):
            for d in da:
                assert d.content_hash
Exemplo n.º 23
0
def test_delete_by_id(docarray: DocumentArray, document_factory):
    doc = document_factory.create(4, 'test 4')
    docarray.append(doc)
    del docarray[doc.id]
    assert len(docarray) == 3
    assert docarray == docarray
Exemplo n.º 24
0
 def no_polling(self, docs: DocumentArray, **kwargs):
     docs.append(Document(text='added'))
     return docs
Exemplo n.º 25
0
 def search(self, docs: DocumentArray, **kwargs):
     docs.append(Document(text='added'))
     return docs
Exemplo n.º 26
0
 def foo(self, docs: DocumentArray, **kwargs):
     self.logger.info(f'doc count {len(docs)}')
     docs.append(Document(text=self.name))
     return docs