def test_faiss_retrieving(index_factory): document_store = FAISSDocumentStore( sql_url="sqlite:///haystack_test_faiss.db", faiss_index_factory_str=index_factory) document_store.delete_all_documents(index="document") if "ivf" in index_factory.lower(): document_store.train_index(DOCUMENTS) document_store.write_documents(DOCUMENTS) retriever = EmbeddingRetriever(document_store=document_store, embedding_model="deepset/sentence_bert", use_gpu=False) result = retriever.retrieve(query="How to test this?") assert len(result) == len(DOCUMENTS) assert type(result[0]) == Document
def test_faiss_passing_index_from_outside(): d = 768 nlist = 2 quantizer = faiss.IndexFlatIP(d) faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT) faiss_index.nprobe = 2 document_store = FAISSDocumentStore( sql_url="sqlite:///haystack_test_faiss.db", faiss_index=faiss_index) document_store.delete_all_documents(index="document") # as it is a IVF index we need to train it before adding docs document_store.train_index(DOCUMENTS) document_store.write_documents(documents=DOCUMENTS, index="document") documents_indexed = document_store.get_all_documents(index="document") # test document correctness check_data_correctness(documents_indexed, DOCUMENTS)
def test_faiss_passing_index_from_outside(): d = 768 nlist = 2 quantizer = faiss.IndexFlatIP(d) faiss_index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_INNER_PRODUCT) faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable) faiss_index.nprobe = 2 document_store = FAISSDocumentStore( sql_url="sqlite:///haystack_test_faiss.db", faiss_index=faiss_index) document_store.delete_all_documents(index="document") # as it is a IVF index we need to train it before adding docs document_store.train_index(DOCUMENTS) document_store.write_documents(documents=DOCUMENTS, index="document") documents_indexed = document_store.get_all_documents(index="document") # test if vectors ids are associated with docs for doc in documents_indexed: assert 0 <= int(doc.meta["vector_id"]) <= 7