def get_document_store(document_store_type, similarity='dot_product'): """ TODO This method is taken from test/conftest.py but maybe should be within Haystack. Perhaps a class method of DocStore that just takes string for type of DocStore""" if document_store_type == "sql": if os.path.exists("haystack_test.db"): os.remove("haystack_test.db") document_store = SQLDocumentStore(url="sqlite:///haystack_test.db") assert document_store.get_document_count() == 0 elif document_store_type == "memory": document_store = InMemoryDocumentStore() elif document_store_type == "elasticsearch": # make sure we start from a fresh index client = Elasticsearch() client.indices.delete(index='haystack_test*', ignore=[404]) document_store = ElasticsearchDocumentStore(index="eval_document", similarity=similarity, timeout=3000) elif document_store_type in ("milvus_flat", "milvus_hnsw"): if document_store_type == "milvus_flat": index_type = IndexType.FLAT index_param = None search_param = None elif document_store_type == "milvus_hnsw": index_type = IndexType.HNSW index_param = {"M": 64, "efConstruction": 80} search_param = {"ef": 20} document_store = MilvusDocumentStore(similarity=similarity, index_type=index_type, index_param=index_param, search_param=search_param) assert document_store.get_document_count(index="eval_document") == 0 elif document_store_type in ("faiss_flat", "faiss_hnsw"): if document_store_type == "faiss_flat": index_type = "Flat" elif document_store_type == "faiss_hnsw": index_type = "HNSW" status = subprocess.run(['docker rm -f haystack-postgres'], shell=True) time.sleep(1) status = subprocess.run([ 'docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres' ], shell=True) time.sleep(6) status = subprocess.run([ 'docker exec haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"' ], shell=True) time.sleep(1) document_store = FAISSDocumentStore( sql_url="postgresql://*****:*****@localhost:5432/haystack", faiss_index_factory_str=index_type, similarity=similarity) assert document_store.get_document_count() == 0 else: raise Exception( f"No document store fixture for '{document_store_type}'") return document_store
def get_document_store(document_store_type, es_similarity='cosine'): """ TODO This method is taken from test/conftest.py but maybe should be within Haystack. Perhaps a class method of DocStore that just takes string for type of DocStore""" if document_store_type == "sql": if os.path.exists("haystack_test.db"): os.remove("haystack_test.db") document_store = SQLDocumentStore(url="sqlite:///haystack_test.db") elif document_store_type == "memory": document_store = InMemoryDocumentStore() elif document_store_type == "elasticsearch": # make sure we start from a fresh index client = Elasticsearch() client.indices.delete(index='haystack_test*', ignore=[404]) document_store = ElasticsearchDocumentStore(index="eval_document", similarity=es_similarity) elif document_store_type in ("faiss_flat", "faiss_hnsw"): if document_store_type == "faiss_flat": index_type = "Flat" elif document_store_type == "faiss_hnsw": index_type = "HNSW" #TEMP FIX for issue with deleting docs # status = subprocess.run( # ['docker rm -f haystack-postgres'], # shell=True) # time.sleep(3) # try: # document_store = FAISSDocumentStore(sql_url="postgresql://*****:*****@localhost:5432/haystack", # faiss_index_factory_str=index_type) # except: # Launch a postgres instance & create empty DB # logger.info("Didn't find Postgres. Start a new instance...") status = subprocess.run(['docker rm -f haystack-postgres'], shell=True) time.sleep(1) status = subprocess.run([ 'docker run --name haystack-postgres -p 5432:5432 -e POSTGRES_PASSWORD=password -d postgres' ], shell=True) time.sleep(3) status = subprocess.run([ 'docker exec -it haystack-postgres psql -U postgres -c "CREATE DATABASE haystack;"' ], shell=True) time.sleep(1) document_store = FAISSDocumentStore( sql_url="postgresql://*****:*****@localhost:5432/haystack", faiss_index_factory_str=index_type) else: raise Exception( f"No document store fixture for '{document_store_type}'") assert document_store.get_document_count() == 0 return document_store