Python Document示例

def test_convert_uri_to_data_uri(uri, mimetype):
    doc = Document(uri=uri, mime_type=mimetype)
    doc.convert_uri_to_datauri()
    assert doc.uri.startswith(f'data:{mimetype}')
    assert doc.mime_type == mimetype

示例#2

显示文件

文件： test_documentset.py 项目： strawberrypie/jina

def add_chunk(doc):
    with Document() as chunk:
        chunk.granularity = doc.granularity + 1
        chunk.adjacency = doc.adjacency
        doc.chunks.append(chunk)
        return chunk

示例#3

显示文件

 def index(self, docs, **kwargs):
     for path_docs in docs.traverse(self._traversal_paths):
         for doc in path_docs:
             for i in range(5):
                 doc.matches.append(Document())

示例#4

显示文件

def document_generator(num_doc):
    for _ in range(num_doc):
        doc = Document(content='hello')
        groundtruth_doc = Document(content='hello')
        yield doc, groundtruth_doc

示例#5

显示文件

def test_crud_in_readme(mocker):
    docs = [
        Document(id='🐲',
                 embedding=np.array([0, 0]),
                 tags={
                     'guardian': 'Azure Dragon',
                     'position': 'East'
                 }),
        Document(id='🐦',
                 embedding=np.array([1, 0]),
                 tags={
                     'guardian': 'Vermilion Bird',
                     'position': 'South'
                 }),
        Document(id='🐢',
                 embedding=np.array([0, 1]),
                 tags={
                     'guardian': 'Black Tortoise',
                     'position': 'North'
                 }),
        Document(id='🐯',
                 embedding=np.array([1, 1]),
                 tags={
                     'guardian': 'White Tiger',
                     'position': 'West'
                 })
    ]

    # create
    mock = mocker.Mock()
    with Flow().add(uses='_index') as f:
        f.index(docs, on_done=mock)

    mock.assert_called_once()

    # read
    def validate(req):
        assert len(req.docs[0].matches) == 3
        for match in req.docs[0].matches:
            assert match.id != '🐯'
            assert 'position' in match.tags
            assert 'guardian' in match.tags
            assert match.score.ref_id == req.docs[0].id

    mock = mocker.Mock()

    with f:
        f.search(docs[0], top_k=3, on_done=mock)
    validate_callback(mock, validate)

    # update
    mock = mocker.Mock()

    d = docs[0]
    d.embedding = np.array([1, 1])
    with f:
        f.update(d, on_done=mock)
    mock.assert_called_once()

    # search again

    def validate(req):
        assert len(req.docs[0].matches) == 1
        req.docs[0].matches[0].id = req.docs[0].id
        # embeddings are removed in the CompoundIndexer via ExcludeQL
        np.testing.assert_array_equal(req.docs[0].matches[0].embedding,
                                      np.array(None))

    mock = mocker.Mock()

    with f:
        f.search(docs[0], top_k=1, on_done=mock)
    validate_callback(mock, validate)

    # delete
    mock = mocker.Mock()

    with f:
        f.delete(['🐦', '🐲'], on_done=mock)
    mock.assert_called_once()

    # search again

    def validate(req):
        assert len(req.docs[0].matches) == 2

    mock = mocker.Mock()

    with f:
        f.search(docs[0], top_k=4, on_done=mock)
    validate_callback(mock, validate)

示例#6

显示文件

文件： test_pod.py 项目： srbhr/jina

async def async_inputs():
    for _ in range(20):
        yield Document(text='client0-Request')

示例#7

显示文件

文件： test_curd_readme.py 项目： tawabshakeel/jina

def test_crud_in_readme(mocker):
    docs = [
        Document(id='🐲',
                 embedding=np.array([0, 0]),
                 tags={
                     'guardian': 'Azure Dragon',
                     'position': 'East'
                 }),
        Document(id='🐦',
                 embedding=np.array([1, 0]),
                 tags={
                     'guardian': 'Vermilion Bird',
                     'position': 'South'
                 }),
        Document(id='🐢',
                 embedding=np.array([0, 1]),
                 tags={
                     'guardian': 'Black Tortoise',
                     'position': 'North'
                 }),
        Document(id='🐯',
                 embedding=np.array([1, 1]),
                 tags={
                     'guardian': 'White Tiger',
                     'position': 'West'
                 })
    ]

    # create
    m = mocker.Mock()
    with Flow().add(uses='_index') as f:
        f.index(docs, on_done=m)

    m.assert_called_once()

    # read
    def validate(req):
        assert len(req.docs[0].matches) == 3
        for m in req.docs[0].matches:
            assert m.id != '🐯'
            assert 'position' in m.tags
            assert 'guardian' in m.tags

    m = mocker.Mock(wrap=validate)

    with f:
        f.search(docs[0], top_k=3, on_done=m)
    m.assert_called_once()

    # update
    m = mocker.Mock()

    d = docs[0]
    d.embedding = np.array([1, 1])
    with f:
        f.update(d, on_done=m)
    m.assert_called_once()

    # search again

    def validate(req):
        assert len(req.docs[0].matches) == 1
        req.docs[0].matches[0].id = req.docs[0].id
        np.testing.assert_array_equal(req.docs[0].matches[0].embedding,
                                      docs[0].embedding)

    m = mocker.Mock(wrap=validate)

    with f:
        f.search(docs[0], top_k=1, on_done=m)
    m.assert_called_once()

    # delete
    m = mocker.Mock()

    with f:
        f.delete(['🐦', '🐲'], on_done=m)
    m.assert_called_once()

    # search again

    def validate(req):
        assert len(req.docs[0].matches) == 2

    m = mocker.Mock(wrap=validate)

    with f:
        f.search(docs[0], top_k=4, on_done=m)
    m.assert_called_once()

示例#8

显示文件

 def craft(self, docs: DocumentArray, **kwargs):
     for doc in docs:
         doc.chunks.append(Document(doc, copy=True, tags={'root_doc_id': doc.id}))
     return docs

示例#9

显示文件

 def craft(self, docs: DocumentArray, **kwargs):
     for doc in docs:
         doc.convert_image_uri_to_blob()
         doc.chunks.append(Document(blob=doc.blob, mime_type='image/*'))
     return docs

示例#10

显示文件

文件： test_converters.py 项目： paddlelaw/jina

def test_image_normalize(shape, channel_axis):
    doc = Document(content=np.random.randint(0, 255, shape, dtype=np.uint8))
    doc.set_image_blob_normalization(channel_axis=channel_axis)
    assert doc.blob.ndim == 3
    assert doc.blob.shape == shape
    assert doc.blob.dtype == np.float32

示例#11

显示文件

def test_simple_routing():
    f = Flow().add(uses=SimplExecutor)
    with f:
        results = f.post(on='/index', inputs=[Document()], return_results=True)
        assert results[0].docs[0].text == 'Hello World!'

示例#12

显示文件

文件： test_converters.py 项目： paddlelaw/jina

def test_datauri_to_blob():
    doc = Document(uri=os.path.join(cur_dir, 'test.png'))
    doc.convert_uri_to_datauri()
    assert not doc.blob
    assert doc.mime_type == 'image/png'

示例#13

显示文件

文件： test_converters.py 项目： paddlelaw/jina

def test_uri_to_blob():
    doc = Document(uri=os.path.join(cur_dir, 'test.png'))
    doc.convert_uri_to_image_blob()
    assert isinstance(doc.blob, np.ndarray)
    assert doc.mime_type == 'image/png'
    assert doc.blob.shape == (85, 152, 3)  # h,w,c

示例#14

显示文件

文件： test_converters.py 项目： paddlelaw/jina

def test_glb_converters():
    doc = Document(uri=os.path.join(cur_dir, 'test.glb'))
    doc.convert_uri_to_point_cloud_blob(2000)
    assert doc.blob.shape == (2000, 3)

示例#15

显示文件

文件： test_sharding.py 项目： yuanl/jina

def random_docs(start, end, embed_dim=10):
    for j in range(start, end):
        d = Document()
        d.id = f'{j:0>16}'
        d.tags['id'] = j
        for i in range(5):
            c = Document()
            c.id = f'{j:0>16}'
            d.text = ''.join(
                random.choice(string.ascii_lowercase)
                for _ in range(10)).encode('utf8')
            d.embedding = np.random.random([embed_dim])
            d.chunks.append(c)
        d.text = ''.join(
            random.choice(string.ascii_lowercase)
            for _ in range(10)).encode('utf8')
        d.embedding = np.random.random([embed_dim])
        yield d

示例#16

显示文件

 def segment(self, docs: DocumentArray, **kwargs):
     for doc in docs:
         doc.chunks += [
             Document(text=t, mime_type='text/plain', tags={'root_doc_id': doc.tags['root_doc_id']}) for t in doc.text.split('\n')]
     return docs

示例#17

显示文件

文件： test_flow.py 项目： smarthi/jina

def test_single_document_flow_index():
    d = Document()
    with Flow().add() as f:
        f.index(d)
        f.index(lambda: d)

示例#18

显示文件

文件： test_crud.py 项目： srbhr/jina

def test_crud(tmpdir, rest):
    os.environ['RESTFUL'] = 'http' if rest else 'grpc'
    os.environ['WORKSPACE'] = str(tmpdir)

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        original_docs = list(random_docs(10, chunks_per_doc=0))
        if rest:
            rest_post(f, 'index', original_docs)
        else:
            c.post(
                on='/index',
                inputs=original_docs,
            )

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        inputs = list(random_docs(1))
        if rest:
            results = rest_post(f, 'search', inputs)
            matches = results['data'][0]['matches']
            for doc in results['data']:
                assert Document.from_dict(doc).text == 'hello world'
        else:
            results = c.post(on='/search',
                             inputs=inputs,
                             parameters=PARAMS,
                             return_results=True)
            matches = results[0].docs[0].matches
            for doc in results[0].docs:
                assert doc.text == 'hello world'

        assert len(matches) == 10

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        inputs = list(random_docs(5, chunks_per_doc=0))

        if rest:
            rest_post(f, 'delete', inputs)

        else:
            c.post(on='/delete', inputs=inputs)

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        inputs = list(random_docs(1))

        if rest:
            results = rest_post(f, 'search', inputs)
            matches = results['data'][0]['matches']

        else:
            results = c.post(on='/search',
                             inputs=inputs,
                             parameters=PARAMS,
                             return_results=True)
            matches = results[0].docs[0].matches

        assert len(matches) == 5

    updated_docs = list(
        random_docs(5, chunks_per_doc=5, start_id=5, text='hello again'))

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        if rest:
            rest_post(f, 'update', updated_docs)
        else:
            c.post(on='/update', inputs=updated_docs)

    with Flow.load_config('flow.yml') as f:
        c = Client(port=f.port_expose)
        inputs = list(random_docs(1))
        if rest:
            results = rest_post(f, 'search', inputs)
            matches = sorted(results['data'][0]['matches'],
                             key=lambda match: match['id'])
        else:
            results = c.post(on='/search',
                             inputs=inputs,
                             parameters=PARAMS,
                             return_results=True)
            matches = sorted(results[0].docs[0].matches,
                             key=lambda match: match.id)

        assert len(matches) == 5

        for match, updated_doc in zip(matches, updated_docs):
            if isinstance(match, dict):
                match = Document.from_dict(match)

            assert updated_doc.id == match.id
            assert updated_doc.text == match.text
            np.testing.assert_array_equal(updated_doc.embedding,
                                          match.embedding)
            assert len(match.chunks) == 5
            assert len(match.chunks) == len(updated_doc.chunks)
            for match_chunk, updated_doc_chunk in zip(match.chunks,
                                                      updated_doc.chunks):
                assert match_chunk.text == updated_doc_chunk.text
                np.testing.assert_array_equal(match_chunk.embedding,
                                              updated_doc_chunk.embedding)

示例#19

显示文件

 def add(self, **kwargs):
     return DocumentArray(
         [Document(text='executor was here') for _ in range(100)])

示例#20

显示文件

文件： test_traverse.py 项目： paddlelaw/jina

def test_traverse_flat_root_itself():
    da = DocumentArray([Document() for _ in range(100)])
    res = da.traverse_flat('r')
    assert id(res) == id(da)

示例#21

显示文件

def get_doc(i):
    return Document(text=f'doc {i}', embedding=np.array([i] * 5))

示例#22

显示文件

文件： test_crud_corrupted_docs.py 项目： Hravan/jina

def random_docs_only_tags(nr_docs, start=0):
    for j in range(start, nr_docs + start):
        d = Document()
        d.tags['id'] = j
        d.tags['something'] = f'abcdef {j}'
        yield d

示例#23

显示文件

文件： test_integration.py 项目： strawberrypie/jina

def doc_to_index():
    doc = Document()
    doc.text = 'test'
    return doc

示例#24

显示文件

文件： test_crud_corrupted_docs.py 项目： Hravan/jina

def random_docs_with_shapes(nr_docs, emb_shape, start=0):
    for i in range(start, nr_docs + start):
        with Document() as d:
            d.id = i
            d.embedding = np.random.random(emb_shape)
        yield d

示例#25

显示文件

文件： test_documentset.py 项目： strawberrypie/jina

 def create(self, idx, text):
     with Document(id=idx) as d:
         d.tags['id'] = idx
         d.text = text
     return d

示例#26

显示文件

def docs():
    d = Document()
    d.tags = INPUT_TAGS
    return [d]

示例#27

显示文件

文件： test_documentset.py 项目： strawberrypie/jina

def add_match(doc):
    with Document() as match:
        match.granularity = doc.granularity
        match.adjacency = doc.adjacency + 1
        doc.matches.add(match)
        return match

示例#28

显示文件

def test_copy_tags(docs):
    for d in docs:
        _d = Document(tags=d.tags)
        assert _d.tags == d.tags

示例#29

显示文件

def doc_with_multimodal_chunks_wrong(embeddings):
    doc = MultimodalDocument()
    chunk1 = Document()
    chunk2 = Document()
    chunk3 = Document()
    chunk1.modality = 'visual'
    chunk2.modality = 'visual'
    chunk3.modality = 'textual'
    chunk1.embedding = embeddings[0]
    chunk2.embedding = embeddings[1]
    chunk3.embedding = embeddings[2]
    chunk1.update_id()
    chunk2.update_id()
    chunk3.update_id()
    doc.update_id()
    doc.chunks.append(chunk1)
    doc.chunks.append(chunk2)
    doc.chunks.append(chunk3)
    return doc

示例#30

显示文件

文件： test_converters.py 项目： paddlelaw/jina

def test_convert_content_to_uri():
    d = Document(content=np.random.random([10, 10]))
    with pytest.raises(NotImplementedError):
        d.convert_content_to_uri()