def input_index_data(num_docs=None, batch_size=8, dataset_type='f30k'): captions = 'dataset_flickr30k.json' if dataset_type == 'f30k' else 'captions.txt' if dataset_type == 'toy-data': base_folder = '.' else: base_folder = 'data' data_loader = get_data_loader( root=os.path.join(cur_dir, f'{base_folder}/{dataset_type}/images'), captions=os.path.join(cur_dir, f'{base_folder}/{dataset_type}/{captions}'), split='test', batch_size=batch_size, dataset_type=dataset_type) for i, (images, captions) in enumerate(data_loader): for image, caption in zip(images, captions): hashed = hashlib.sha1(image).hexdigest() document_img = Document() document_img.buffer = image document_img.modality = 'image' document_img.mime_type = 'image/jpeg' document_caption = Document(id=hashed) document_caption.text = caption document_caption.modality = 'text' document_caption.mime_type = 'text/plain' document_caption.tags['id'] = caption yield document_img yield document_caption if num_docs and (i + 1) * batch_size >= num_docs: break
def doc_with_multimodal_chunks(embeddings): doc = MultimodalDocument() chunk1 = Document() chunk2 = Document() chunk3 = Document() chunk1.modality = 'visual1' chunk2.modality = 'visual2' chunk3.modality = 'textual' chunk1.embedding = embeddings[0] chunk2.embedding = embeddings[1] chunk3.embedding = embeddings[2] doc.chunks.append(chunk1) doc.chunks.append(chunk2) doc.chunks.append(chunk3) return doc
def doc_with_multimodal_chunks_wrong(embeddings): doc = Document() chunk1 = Document() chunk2 = Document() chunk3 = Document() chunk1.modality = 'visual' chunk2.modality = 'visual' chunk3.modality = 'textual' chunk1.embedding = embeddings[0] chunk2.embedding = embeddings[1] chunk3.embedding = embeddings[2] chunk1.update_id() chunk2.update_id() chunk3.update_id() doc.update_id() doc.chunks.append(chunk1) doc.chunks.append(chunk2) doc.chunks.append(chunk3) return doc
def chunk_4(textual_embedding): chunk = Document() chunk.modality = 'textual' chunk.embedding = textual_embedding chunk.granularity = 1 return chunk
def chunk_2(textual_embedding): chunk = Document() chunk.modality = 'textual' chunk.content = textual_embedding chunk.granularity = 0 return chunk
def chunk_1(visual_embedding): chunk = Document() chunk.modality = 'visual' chunk.embedding = visual_embedding chunk.granularity = 0 return chunk