Пример #1
0
    def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict,
             **kwargs) -> 'DocumentArray':
        """
        :param docs: the doc which gets bubbled up matches
        :param kwargs: not used (kept to maintain interface)
        """

        result_da = DocumentArray(
        )  # length: 1 as every time there is only one query
        for d_mod1, d_mod2 in zip(*docs_matrix):

            final_matches = {}  # type: Dict[str, Document]

            for m in d_mod1.matches:
                m.score.value *= d_mod1.weight
                final_matches[m.parent_id] = Document(m, copy=True)

            for m in d_mod2.matches:
                if m.parent_id in final_matches:
                    final_matches[m.parent_id].score.value += (m.score.value *
                                                               d_mod2.weight)
                else:
                    m.score.value *= d_mod2.weight
                    final_matches[m.parent_id] = Document(m, copy=True)

            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.score.value, reverse=True)
            d = Document(matches=da[:int(parameters['top_k'])])
            result_da.append(d)
        return result_da
Пример #2
0
def test_da_sort_by_document_interface_in_proto():
    docs = [Document(embedding=np.array([1] * (10 - i))) for i in range(10)]
    da = DocumentArray(
        [docs[i] if (i % 2 == 0) else docs[i].proto for i in range(len(docs))])
    assert len(da) == 10
    assert da[0].embedding.shape == (10, )

    da.sort(key=lambda d: d.embedding.dense.shape[0])
    assert da[0].embedding.shape == (1, )
Пример #3
0
def test_da_sort_by_score():
    da = DocumentArray(
        [Document(id=i, copy=True, scores={'euclid': 10 - i}) for i in range(10)]
    )
    assert da[0].id == '0'
    assert da[0].scores['euclid'].value == 10
    da.sort(key=lambda d: d.scores['euclid'].value)  # sort matches by their values
    assert da[0].id == '9'
    assert da[0].scores['euclid'].value == 1
Пример #4
0
def test_da_sort_topk_tie():
    da = DocumentArray(
        [Document(id=i, tags={'order': i % 10}) for i in range(100)])
    da.sort(top_k=10, key=lambda doc: doc.tags['order'])

    top_k_ids = [doc.id for doc in da[0:10]]
    assert top_k_ids == [
        '0', '10', '20', '30', '40', '50', '60', '70', '80', '90'
    ]
    for i in range(10):
        assert da[i].tags['order'] == 0
Пример #5
0
    def rank(self, docs_matrix: List[DocumentArray], parameters: Dict, **kwargs):
        result = DocumentArray()
        docs_matrix = [doc_arr for doc_arr in docs_matrix if doc_arr is not None and len(doc_arr) > 0]

        for single_doc_per_modality in zip(*docs_matrix):
            final_matches = {}
            for doc in single_doc_per_modality:
                for m in doc.matches:
                    if m.tags['root_doc_id'] in final_matches:
                        final_matches[m.tags['root_doc_id']].score.value += m.score.value
                    else:
                        final_matches[m.tags['root_doc_id']] = Document(id=m.tags['root_doc_id'], score=m.score)
            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.score.value, reverse=True)
            d = Document(matches=da[: int(parameters.get('top_k', 3))])
            result.append(d)
        return result
Пример #6
0
def test_da_sort_topk():
    da = DocumentArray(
        [Document(id=i, scores={'euclid': 10 - i}) for i in range(10)])
    original = deepcopy(da)

    da.sort(top_k=3, key=lambda d: d.scores['euclid'].value)
    top = [da[i].scores['euclid'].value for i in range(3)]
    rest = [da[i].scores['euclid'].value for i in range(3, 10)]
    assert top[0] == 1 and top[1] == 2 and top[2] == 3
    assert rest != sorted(rest)
    assert len(da) == len(original)
    assert all([d.id in original for d in da])

    da.sort(top_k=3, key=lambda d: d.scores['euclid'].value, reverse=True)
    top = [da[i].scores['euclid'].value for i in range(3)]
    rest = [da[i].scores['euclid'].value for i in range(3, 10)]
    assert top[0] == 10 and top[1] == 9 and top[2] == 8
    assert rest != sorted(rest, reverse=True)
    assert len(da) == len(original)
    assert all([d.id in original for d in da])
Пример #7
0
    def rank(self, docs_matrix: List['DocumentArray'], parameters: Dict,
             **kwargs) -> 'DocumentArray':
        """
        :param docs_matrix: list of :class:`DocumentArray` on multiple requests to
          get bubbled up matches.
        :param parameters: the parameters passed into the ranker, in this case stores :attr`top_k`
          to filter k results based on score.
        :param kwargs: not used (kept to maintain interface)
        """

        result_da = DocumentArray(
        )  # length: 1 as every time there is only one query
        for d_mod1, d_mod2 in zip(*docs_matrix):

            final_matches = {}  # type: Dict[str, Document]

            for m in d_mod1.matches:
                m.scores[
                    'relevance'] = m.scores['cosine'].value * d_mod1.weight
                final_matches[m.parent_id] = Document(m, copy=True)

            for m in d_mod2.matches:
                if m.parent_id in final_matches:
                    final_matches[
                        m.parent_id].scores['relevance'] = final_matches[
                            m.parent_id].scores['relevance'].value + (
                                m.scores['cosine'].value * d_mod2.weight)
                else:
                    m.scores[
                        'relevance'] = m.scores['cosine'].value * d_mod2.weight
                    final_matches[m.parent_id] = Document(m, copy=True)

            da = DocumentArray(list(final_matches.values()))
            da.sort(key=lambda ma: ma.scores['relevance'].value, reverse=True)
            d = Document(matches=da[:int(parameters['top_k'])])
            result_da.append(d)
        return result_da