Пример #1
0
def test_multimodal_document_fail_bad_doctype(visual_embedding):
    # the multimodal document don't have any chunks
    with pytest.raises(BadDocType):
        md = MultimodalDocument()
        md.tags['id'] = 1
        md.embedding = visual_embedding
        md.modality_content_mapping
Пример #2
0
def test_from_content_category_mapping(modality_content_mapping):
    md = MultimodalDocument.from_modality_content_mapping(
        modality_content_mapping=modality_content_mapping)
    assert len(md.modalities) == 2
    assert 'visual' and 'textual' in md.modalities
    assert len(md.chunks) == 2
    assert md.granularity == md.chunks[0].granularity - 1
Пример #3
0
def test_from_chunks_granularity_2(chunk_5, chunk_6):
    md = MultimodalDocument(chunks=[chunk_5, chunk_6])
    assert len(md.modalities) == 2
    assert 'visual' and 'textual' in md.modalities
    assert len(md.chunks) == 2
    assert md.granularity == md.chunks[0].granularity - 1
    assert md.chunks[0].granularity == 3
Пример #4
0
def multimodal_generator():
    for i in range(0, 5):
        document = MultimodalDocument(modality_content_map={
            '1': f'aaa {i}',
            '2': f'bbb {i}'
        })
        yield document
Пример #5
0
def test_from_chunks_success(chunk_1, chunk_2):
    md = MultimodalDocument(chunks=[chunk_1, chunk_2])
    assert len(md.modalities) == 2
    assert 'visual' and 'textual' in md.modalities
    assert len(md.chunks) == 2
    assert md.granularity == md.chunks[0].granularity - 1
    assert md.chunks[0].granularity == 1
Пример #6
0
def query_generator(image_paths, text_queries):
    for image_path, text in zip(image_paths, text_queries):
        with open(image_path, 'rb') as fp:
            buffer = fp.read()
        yield MultimodalDocument(modality_content_map={
            'image': buffer,
            'text': text
        })
Пример #7
0
def multimodal_document(chunk_1, chunk_2):
    with MultimodalDocument() as md:
        md.tags['id'] = 1
        md.text = b'hello world'
        md.embedding = np.random.random([10 + np.random.randint(0, 1)])
        md.chunks.add(
            chunk_1)  # attach a document with embedding and without content
        md.chunks.add(
            chunk_2)  # attach a document with content and without embedding
        yield md
Пример #8
0
def doc_with_multimodal_chunks(embeddings):
    doc = MultimodalDocument()
    chunk1 = Document()
    chunk2 = Document()
    chunk3 = Document()
    chunk1.modality = 'visual1'
    chunk2.modality = 'visual2'
    chunk3.modality = 'textual'
    chunk1.embedding = embeddings[0]
    chunk2.embedding = embeddings[1]
    chunk3.embedding = embeddings[2]
    chunk1.update_id()
    chunk2.update_id()
    chunk3.update_id()
    doc.update_id()
    doc.chunks.append(chunk1)
    doc.chunks.append(chunk2)
    doc.chunks.append(chunk3)
    return doc
Пример #9
0
def test_from_documents_set():
    docs = []
    for i in range(0, 3):
        doc = MultimodalDocument.from_modality_content_mapping({'modA': f'textA {i}', 'modB': f'textB {i}'})
        docs.append(doc)

    for doc in MultimodalDocumentSet(docs):
        assert len(doc.chunks) == 2

    for doc in MultimodalDocumentSet(DocumentSet(docs)):
        assert len(doc.chunks) == 2
Пример #10
0
def test_from_chunks_fail_multiple_granularity(chunk_1, chunk_2, chunk_4):
    """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks with different
    granularity value, expect all chunks has the same granularity value.
    """
    with pytest.raises(BadDocType):
        MultimodalDocument.from_chunks(chunks=[chunk_1, chunk_2, chunk_4])
Пример #11
0
def test_from_chunks_fail_length_mismatch(chunk_1, chunk_2, chunk_3):
    """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks
    with 2 modalities.
    """
    with pytest.raises(LengthMismatchException):
        MultimodalDocument.from_chunks(chunks=[chunk_1, chunk_2, chunk_3])
Пример #12
0
def test_from_chunks_fail_length_mismatch(chunk_1, chunk_2, chunk_3):
    """Initialize a :class:`MultimodalDocument` expect to fail which has 3 chunks
    with 2 modalities.
    """
    assert not MultimodalDocument(chunks=[chunk_1, chunk_2, chunk_3]).is_valid
Пример #13
0
def test_assert_granularity(chunk_1, chunk_6):
    with pytest.raises(BadDocType):
        md = MultimodalDocument(chunks=[chunk_1, chunk_6])
Пример #14
0
def test_multimodal_document_fail_bad_doctype(visual_embedding):
    # the multimodal document don't have any chunks
    md = MultimodalDocument()
    md.tags['id'] = 1
    md.embedding = visual_embedding
    assert not md.is_valid