Python _doc_without_embedding示例

编程语言: Python

命名空间/包名称: jina.drivers.dbms

方法/功能: _doc_without_embedding

hotexamples.com的示例: 3

Python _doc_without_embedding - 已找到3个示例。这些是从开源项目中提取的最受好评的jina.drivers.dbms._doc_without_embedding现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

def test_dbms_keyvalue(tmpdir, test_metas):
    docs = list(get_documents(chunks=False, nr=10, same_content=True))
    ids, vecs, meta = zip(*[(doc.id, doc.embedding,
                             _doc_without_embedding(doc).SerializeToString())
                            for doc in docs])
    save_path = None
    with KeyValueDBMSIndexer(index_filename='dbms',
                             metas=test_metas) as indexer:
        indexer.add(ids, vecs, meta)
        assert indexer.size == len(docs)
        save_path = indexer.save_abspath

    new_docs = list(get_documents(chunks=False, nr=10, same_content=False))
    ids, vecs, meta = zip(*[(doc.id, doc.embedding,
                             _doc_without_embedding(doc).SerializeToString())
                            for doc in new_docs])

    # assert contents update
    with BaseDBMSIndexer.load(save_path) as indexer:
        indexer.update(ids, vecs, meta)
        assert indexer.size == len(docs)

    # assert contents update
    with BaseDBMSIndexer.load(save_path) as indexer:
        indexer.delete([d.id for d in docs])
        assert indexer.size == 0

示例#2

显示文件

文件： test_dump_dbms.py 项目： shishirbh/jina

def assert_dump_data(dump_path, docs, shards, pea_id):
    size_shard = len(docs) // shards
    size_shard_modulus = len(docs) % shards
    ids_dump, vectors_dump = import_vectors(
        dump_path,
        str(pea_id),
    )
    if pea_id == shards - 1:
        docs_expected = docs[(pea_id) * size_shard:(pea_id + 1) * size_shard +
                             size_shard_modulus]
    else:
        docs_expected = docs[(pea_id) * size_shard:(pea_id + 1) * size_shard]
    print(f'### pea {pea_id} has {len(docs_expected)} docs')

    ids_dump = list(ids_dump)
    vectors_dump = list(vectors_dump)
    np.testing.assert_equal(ids_dump, [d.id for d in docs_expected])
    np.testing.assert_allclose(vectors_dump,
                               [d.embedding for d in docs_expected])

    _, metas_dump = import_metas(
        dump_path,
        str(pea_id),
    )
    metas_dump = list(metas_dump)
    np.testing.assert_equal(
        metas_dump,
        [_doc_without_embedding(d).SerializeToString() for d in docs_expected],
    )

    # assert with Indexers
    # TODO currently metas are only passed to the parent Compound, not to the inner components
    with TimeContext(f'### reloading {len(docs_expected)}'):
        # noinspection PyTypeChecker
        cp: CompoundQueryExecutor = BaseQueryIndexer.load_config(
            'indexer_query.yml',
            pea_id=pea_id,
            metas={
                'workspace': os.path.join(dump_path, 'new_ws'),
                'dump_path': dump_path,
            },
        )
    for c in cp.components:
        assert c.size == len(docs_expected)

    # test with the inner indexers separate from the Compound
    for i, indexer_file in enumerate(
        ['basic/query_np.yml', 'basic/query_kv.yml']):
        indexer = BaseQueryIndexer.load_config(
            indexer_file,
            pea_id=pea_id,
            metas={
                'workspace':
                os.path.realpath(os.path.join(dump_path, f'new_ws-{i}')),
                'dump_path':
                dump_path,
            },
        )
        assert indexer.size == len(docs_expected)

示例#3

显示文件

文件： test_dump_dbms.py 项目： tapasag/jina

 def _validate_results_nonempty(resp):
     assert len(resp.docs) == nr_search
     for d in resp.docs:
         if nr_docs < 10:
             assert len(d.matches) == nr_docs
         else:
             # TODO does it return all of them no matter how many?
             assert len(d.matches) > 0
         for m in d.matches:
             assert m.embedding.shape[0] == emb_size
             assert _doc_without_embedding(m).SerializeToString() is not None
             assert 'hello world' in m.text
             assert f'tag data' in m.tags['tag_field']